mirror of https://github.com/CIRCL/PyCIRCLean
Change various methods to properties
parent
59cde8cfd5
commit
963a2feef4
|
@ -120,9 +120,9 @@ class File(FileBase):
|
||||||
}
|
}
|
||||||
|
|
||||||
def _check_dangerous(self):
|
def _check_dangerous(self):
|
||||||
if not self.has_mimetype():
|
if not self.has_mimetype:
|
||||||
self.make_dangerous('no mimetype')
|
self.make_dangerous('no mimetype')
|
||||||
if not self.has_extension():
|
if not self.has_extension:
|
||||||
self.make_dangerous('no extension')
|
self.make_dangerous('no extension')
|
||||||
if self.extension in Config.malicious_exts:
|
if self.extension in Config.malicious_exts:
|
||||||
self.make_dangerous('malicious_extension')
|
self.make_dangerous('malicious_extension')
|
||||||
|
@ -156,17 +156,17 @@ class File(FileBase):
|
||||||
expected_extensions = mimetypes.guess_all_extensions(mimetype,
|
expected_extensions = mimetypes.guess_all_extensions(mimetype,
|
||||||
strict=False)
|
strict=False)
|
||||||
if expected_extensions:
|
if expected_extensions:
|
||||||
if self.has_extension() and self.extension not in expected_extensions:
|
if self.has_extension and self.extension not in expected_extensions:
|
||||||
# LOG: improve this string
|
# LOG: improve this string
|
||||||
self.make_dangerous('expected extensions')
|
self.make_dangerous('expected extensions')
|
||||||
|
|
||||||
def check(self):
|
def check(self):
|
||||||
self._check_dangerous()
|
self._check_dangerous()
|
||||||
if self.has_extension():
|
if self.has_extension:
|
||||||
self._check_extension()
|
self._check_extension()
|
||||||
if self.has_mimetype():
|
if self.has_mimetype:
|
||||||
self._check_mimetype()
|
self._check_mimetype()
|
||||||
if not self.is_dangerous():
|
if not self.is_dangerous:
|
||||||
self.mime_processing_options.get(self.main_type, self.unknown)()
|
self.mime_processing_options.get(self.main_type, self.unknown)()
|
||||||
|
|
||||||
# ##### Helper functions #####
|
# ##### Helper functions #####
|
||||||
|
@ -178,11 +178,6 @@ class File(FileBase):
|
||||||
dict_to_return[subtype] = method
|
dict_to_return[subtype] = method
|
||||||
return dict_to_return
|
return dict_to_return
|
||||||
|
|
||||||
def write_log(self):
|
|
||||||
"""Print the logs related to the current file being processed."""
|
|
||||||
# LOG: move to helpers.py GroomerLogger and modify
|
|
||||||
tmp_log = self.logger.log.fields(**self._file_props)
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def has_metadata(self):
|
def has_metadata(self):
|
||||||
if self.mimetype in Config.mimes_metadata:
|
if self.mimetype in Config.mimes_metadata:
|
||||||
|
@ -200,7 +195,7 @@ class File(FileBase):
|
||||||
# ##### Discarded mimetypes, reason in the docstring ######
|
# ##### Discarded mimetypes, reason in the docstring ######
|
||||||
def inode(self):
|
def inode(self):
|
||||||
"""Empty file or symlink."""
|
"""Empty file or symlink."""
|
||||||
if self.is_symlink():
|
if self.is_symlink:
|
||||||
symlink_path = self.get_property('symlink')
|
symlink_path = self.get_property('symlink')
|
||||||
self.add_file_string('Symlink to {}'.format(symlink_path))
|
self.add_file_string('Symlink to {}'.format(symlink_path))
|
||||||
else:
|
else:
|
||||||
|
@ -479,19 +474,19 @@ class KittenGroomerFileCheck(KittenGroomerBase):
|
||||||
|
|
||||||
def process_dir(self, src_dir, dst_dir):
|
def process_dir(self, src_dir, dst_dir):
|
||||||
"""Main function coordinating file processing."""
|
"""Main function coordinating file processing."""
|
||||||
# LOG: we probably want to move this write_log elsewhere:
|
# LOG: what's the purpose of this write log?:
|
||||||
# if self.recursive_archive_depth > 0:
|
# if self.recursive_archive_depth > 0:
|
||||||
# self.write_log()
|
# self.write_log()
|
||||||
# TODO: Can we clean up the way we handle relative_path?
|
# We're writing the log here because...
|
||||||
|
# How exactly does the workflow work with an archive?
|
||||||
for srcpath in self.list_all_files(src_dir):
|
for srcpath in self.list_all_files(src_dir):
|
||||||
dstpath = srcpath.replace(src_dir, dst_dir)
|
dstpath = srcpath.replace(src_dir, dst_dir)
|
||||||
relative_path = srcpath.replace(src_dir + '/', '')
|
# TODO: Can we clean up the way we handle relative_path?
|
||||||
|
# Relative path is here so that when we print files in the log it
|
||||||
|
# shows only the file's path. Should we just pass it to the logger
|
||||||
|
# when we create it? Or let the logger figure it out?
|
||||||
|
# relative_path = srcpath.replace(src_dir + '/', '')
|
||||||
self.cur_file = File(srcpath, dstpath, self.logger)
|
self.cur_file = File(srcpath, dstpath, self.logger)
|
||||||
# LOG: move this logging code elsewhere
|
|
||||||
self.logger.log.info('Processing {} ({}/{})',
|
|
||||||
relative_path,
|
|
||||||
self.cur_file.main_type,
|
|
||||||
self.cur_file.sub_type)
|
|
||||||
self.process_file(self.cur_file)
|
self.process_file(self.cur_file)
|
||||||
|
|
||||||
def process_file(self, file):
|
def process_file(self, file):
|
||||||
|
|
|
@ -112,6 +112,7 @@ class FileBase(object):
|
||||||
size = 0
|
size = 0
|
||||||
return size
|
return size
|
||||||
|
|
||||||
|
@property
|
||||||
def has_mimetype(self):
|
def has_mimetype(self):
|
||||||
"""Returns True if file has a full mimetype, else False."""
|
"""Returns True if file has a full mimetype, else False."""
|
||||||
if not self.main_type or not self.sub_type:
|
if not self.main_type or not self.sub_type:
|
||||||
|
@ -121,6 +122,7 @@ class FileBase(object):
|
||||||
else:
|
else:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@property
|
||||||
def has_extension(self):
|
def has_extension(self):
|
||||||
"""Returns True if self.extension is set, else False."""
|
"""Returns True if self.extension is set, else False."""
|
||||||
if self.extension is None:
|
if self.extension is None:
|
||||||
|
@ -130,18 +132,22 @@ class FileBase(object):
|
||||||
else:
|
else:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@property
|
||||||
def is_dangerous(self):
|
def is_dangerous(self):
|
||||||
"""True if file has been marked 'dangerous' else False."""
|
"""True if file has been marked 'dangerous' else False."""
|
||||||
return self._file_props['safety_category'] is 'dangerous'
|
return self._file_props['safety_category'] is 'dangerous'
|
||||||
|
|
||||||
|
@property
|
||||||
def is_unknown(self):
|
def is_unknown(self):
|
||||||
"""True if file has been marked 'unknown' else False."""
|
"""True if file has been marked 'unknown' else False."""
|
||||||
return self._file_props['safety_category'] is 'unknown'
|
return self._file_props['safety_category'] is 'unknown'
|
||||||
|
|
||||||
|
@property
|
||||||
def is_binary(self):
|
def is_binary(self):
|
||||||
"""True if file has been marked 'binary' else False."""
|
"""True if file has been marked 'binary' else False."""
|
||||||
return self._file_props['safety_category'] is 'binary'
|
return self._file_props['safety_category'] is 'binary'
|
||||||
|
|
||||||
|
@property
|
||||||
def is_symlink(self):
|
def is_symlink(self):
|
||||||
"""Returns True and updates log if file is a symlink."""
|
"""Returns True and updates log if file is a symlink."""
|
||||||
if self._file_props['symlink'] is False:
|
if self._file_props['symlink'] is False:
|
||||||
|
@ -178,7 +184,7 @@ class FileBase(object):
|
||||||
Prepends and appends DANGEROUS to the destination file name
|
Prepends and appends DANGEROUS to the destination file name
|
||||||
to help prevent double-click of death.
|
to help prevent double-click of death.
|
||||||
"""
|
"""
|
||||||
if self.is_dangerous():
|
if self.is_dangerous:
|
||||||
return
|
return
|
||||||
self.set_property('safety_category', 'dangerous')
|
self.set_property('safety_category', 'dangerous')
|
||||||
# LOG: store reason string
|
# LOG: store reason string
|
||||||
|
@ -187,7 +193,7 @@ class FileBase(object):
|
||||||
|
|
||||||
def make_unknown(self):
|
def make_unknown(self):
|
||||||
"""Marks a file as an unknown type and prepends UNKNOWN to filename."""
|
"""Marks a file as an unknown type and prepends UNKNOWN to filename."""
|
||||||
if self.is_dangerous() or self.is_binary():
|
if self.is_dangerous or self.is_binary:
|
||||||
return
|
return
|
||||||
self.set_property('safety_category', 'unknown')
|
self.set_property('safety_category', 'unknown')
|
||||||
path, filename = os.path.split(self.dst_path)
|
path, filename = os.path.split(self.dst_path)
|
||||||
|
@ -195,7 +201,7 @@ class FileBase(object):
|
||||||
|
|
||||||
def make_binary(self):
|
def make_binary(self):
|
||||||
"""Marks a file as a binary and appends .bin to filename."""
|
"""Marks a file as a binary and appends .bin to filename."""
|
||||||
if self.is_dangerous():
|
if self.is_dangerous:
|
||||||
return
|
return
|
||||||
self.set_property('safety_category', 'binary')
|
self.set_property('safety_category', 'binary')
|
||||||
path, filename = os.path.split(self.dst_path)
|
path, filename = os.path.split(self.dst_path)
|
||||||
|
@ -242,6 +248,10 @@ class FileBase(object):
|
||||||
self.add_error(e, '')
|
self.add_error(e, '')
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def write_log(self):
|
||||||
|
"""Print the logs related to the current file being processed."""
|
||||||
|
tmp_log = self.logger.log.fields(**self._file_props)
|
||||||
|
|
||||||
|
|
||||||
class GroomerLogger(object):
|
class GroomerLogger(object):
|
||||||
"""Groomer logging interface"""
|
"""Groomer logging interface"""
|
||||||
|
@ -291,7 +301,6 @@ class GroomerLogger(object):
|
||||||
return s.hexdigest()
|
return s.hexdigest()
|
||||||
|
|
||||||
def add_file(self, file):
|
def add_file(self, file):
|
||||||
# File object will add itself?
|
|
||||||
# return a sublog for the file
|
# return a sublog for the file
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
|
@ -104,23 +104,23 @@ class TestFileBase:
|
||||||
assert generic_conf_file.mimetype == 'text/plain'
|
assert generic_conf_file.mimetype == 'text/plain'
|
||||||
assert generic_conf_file.main_type == 'text'
|
assert generic_conf_file.main_type == 'text'
|
||||||
assert generic_conf_file.sub_type == 'plain'
|
assert generic_conf_file.sub_type == 'plain'
|
||||||
assert generic_conf_file.has_mimetype()
|
assert generic_conf_file.has_mimetype
|
||||||
# Need to test something without a mimetype
|
# Need to test something without a mimetype
|
||||||
# Need to test something that's a directory
|
# Need to test something that's a directory
|
||||||
# Need to test something that causes the unicode exception
|
# Need to test something that causes the unicode exception
|
||||||
|
|
||||||
def test_has_mimetype_no_main_type(self, generic_conf_file):
|
def test_has_mimetype_no_main_type(self, generic_conf_file):
|
||||||
generic_conf_file.main_type = ''
|
generic_conf_file.main_type = ''
|
||||||
assert generic_conf_file.has_mimetype() is False
|
assert generic_conf_file.has_mimetype is False
|
||||||
|
|
||||||
def test_has_mimetype_no_sub_type(self, generic_conf_file):
|
def test_has_mimetype_no_sub_type(self, generic_conf_file):
|
||||||
generic_conf_file.sub_type = ''
|
generic_conf_file.sub_type = ''
|
||||||
assert generic_conf_file.has_mimetype() is False
|
assert generic_conf_file.has_mimetype is False
|
||||||
|
|
||||||
def test_has_extension(self, temp_file, temp_file_no_ext):
|
def test_has_extension(self, temp_file, temp_file_no_ext):
|
||||||
assert temp_file.has_extension() is True
|
assert temp_file.has_extension is True
|
||||||
print(temp_file_no_ext.extension)
|
print(temp_file_no_ext.extension)
|
||||||
assert temp_file_no_ext.has_extension() is False
|
assert temp_file_no_ext.has_extension is False
|
||||||
|
|
||||||
def test_set_property(self, generic_conf_file):
|
def test_set_property(self, generic_conf_file):
|
||||||
generic_conf_file.set_property('test', True)
|
generic_conf_file.set_property('test', True)
|
||||||
|
@ -129,14 +129,14 @@ class TestFileBase:
|
||||||
|
|
||||||
def test_marked_dangerous(self, file_marked_all_parameterized):
|
def test_marked_dangerous(self, file_marked_all_parameterized):
|
||||||
file_marked_all_parameterized.make_dangerous()
|
file_marked_all_parameterized.make_dangerous()
|
||||||
assert file_marked_all_parameterized.is_dangerous() is True
|
assert file_marked_all_parameterized.is_dangerous is True
|
||||||
# Should work regardless of weird paths??
|
# Should work regardless of weird paths??
|
||||||
# Should check file path alteration behavior as well
|
# Should check file path alteration behavior as well
|
||||||
|
|
||||||
def test_generic_dangerous(self, generic_conf_file):
|
def test_generic_dangerous(self, generic_conf_file):
|
||||||
assert generic_conf_file.is_dangerous() is False
|
assert generic_conf_file.is_dangerous is False
|
||||||
generic_conf_file.make_dangerous()
|
generic_conf_file.make_dangerous()
|
||||||
assert generic_conf_file.is_dangerous() is True
|
assert generic_conf_file.is_dangerous is True
|
||||||
|
|
||||||
def test_has_symlink(self, tmpdir):
|
def test_has_symlink(self, tmpdir):
|
||||||
file_path = tmpdir.join('test.txt')
|
file_path = tmpdir.join('test.txt')
|
||||||
|
@ -147,45 +147,45 @@ class TestFileBase:
|
||||||
os.symlink(file_path, symlink_path)
|
os.symlink(file_path, symlink_path)
|
||||||
file = FileBase(file_path, file_path)
|
file = FileBase(file_path, file_path)
|
||||||
symlink = FileBase(symlink_path, symlink_path)
|
symlink = FileBase(symlink_path, symlink_path)
|
||||||
assert file.is_symlink() is False
|
assert file.is_symlink is False
|
||||||
assert symlink.is_symlink() is True
|
assert symlink.is_symlink is True
|
||||||
|
|
||||||
def test_has_symlink_fixture(self, symlink_file):
|
def test_has_symlink_fixture(self, symlink_file):
|
||||||
assert symlink_file.is_symlink() is True
|
assert symlink_file.is_symlink is True
|
||||||
|
|
||||||
def test_generic_make_unknown(self, generic_conf_file):
|
def test_generic_make_unknown(self, generic_conf_file):
|
||||||
assert generic_conf_file.is_unknown() is False
|
assert generic_conf_file.is_unknown is False
|
||||||
generic_conf_file.make_unknown()
|
generic_conf_file.make_unknown()
|
||||||
assert generic_conf_file.is_unknown()
|
assert generic_conf_file.is_unknown
|
||||||
# given a FileBase object with no marking, should do the right things
|
# given a FileBase object with no marking, should do the right things
|
||||||
|
|
||||||
def test_marked_make_unknown(self, file_marked_all_parameterized):
|
def test_marked_make_unknown(self, file_marked_all_parameterized):
|
||||||
file = file_marked_all_parameterized
|
file = file_marked_all_parameterized
|
||||||
if file.is_unknown():
|
if file.is_unknown:
|
||||||
file.make_unknown()
|
file.make_unknown()
|
||||||
assert file.is_unknown()
|
assert file.is_unknown
|
||||||
else:
|
else:
|
||||||
assert file.is_unknown() is False
|
assert file.is_unknown is False
|
||||||
file.make_unknown()
|
file.make_unknown()
|
||||||
assert file.is_unknown() is False
|
assert file.is_unknown is False
|
||||||
# given a FileBase object with an unrecognized marking, should ???
|
# given a FileBase object with an unrecognized marking, should ???
|
||||||
|
|
||||||
def test_generic_make_binary(self, generic_conf_file):
|
def test_generic_make_binary(self, generic_conf_file):
|
||||||
assert generic_conf_file.is_binary() is False
|
assert generic_conf_file.is_binary is False
|
||||||
generic_conf_file.make_binary()
|
generic_conf_file.make_binary()
|
||||||
assert generic_conf_file.is_binary() is True
|
assert generic_conf_file.is_binary
|
||||||
|
|
||||||
def test_marked_make_binary(self, file_marked_all_parameterized):
|
def test_marked_make_binary(self, file_marked_all_parameterized):
|
||||||
file = file_marked_all_parameterized
|
file = file_marked_all_parameterized
|
||||||
if file.is_dangerous():
|
if file.is_dangerous:
|
||||||
file.make_binary()
|
file.make_binary()
|
||||||
assert file.is_binary() is False
|
assert file.is_binary is False
|
||||||
else:
|
else:
|
||||||
file.make_binary()
|
file.make_binary()
|
||||||
assert file.is_binary()
|
assert file.is_binary
|
||||||
|
|
||||||
def test_force_ext_change(self, generic_conf_file):
|
def test_force_ext_change(self, generic_conf_file):
|
||||||
assert generic_conf_file.has_extension()
|
assert generic_conf_file.has_extension
|
||||||
assert generic_conf_file.get_property('extension') == '.conf'
|
assert generic_conf_file.get_property('extension') == '.conf'
|
||||||
assert os.path.splitext(generic_conf_file.dst_path)[1] == '.conf'
|
assert os.path.splitext(generic_conf_file.dst_path)[1] == '.conf'
|
||||||
generic_conf_file.force_ext('.txt')
|
generic_conf_file.force_ext('.txt')
|
||||||
|
@ -195,7 +195,7 @@ class TestFileBase:
|
||||||
# should be able to handle weird paths
|
# should be able to handle weird paths
|
||||||
|
|
||||||
def test_force_ext_correct(self, generic_conf_file):
|
def test_force_ext_correct(self, generic_conf_file):
|
||||||
assert generic_conf_file.has_extension()
|
assert generic_conf_file.has_extension
|
||||||
assert generic_conf_file.get_property('extension') == '.conf'
|
assert generic_conf_file.get_property('extension') == '.conf'
|
||||||
generic_conf_file.force_ext('.conf')
|
generic_conf_file.force_ext('.conf')
|
||||||
assert os.path.splitext(generic_conf_file.dst_path)[1] == '.conf'
|
assert os.path.splitext(generic_conf_file.dst_path)[1] == '.conf'
|
||||||
|
@ -212,6 +212,10 @@ class TestFileBase:
|
||||||
# if metadata file already exists
|
# if metadata file already exists
|
||||||
# if there is no metadata to write should this work?
|
# if there is no metadata to write should this work?
|
||||||
|
|
||||||
|
def test_safe_copy(self, generic_conf_file):
|
||||||
|
generic_conf_file.safe_copy()
|
||||||
|
# check that safe copy can handle weird file path inputs
|
||||||
|
|
||||||
|
|
||||||
class TestLogger:
|
class TestLogger:
|
||||||
|
|
||||||
|
@ -246,17 +250,6 @@ class TestKittenGroomerBase:
|
||||||
dest_directory,
|
dest_directory,
|
||||||
debug=True)
|
debug=True)
|
||||||
|
|
||||||
def test_safe_copy(self, tmpdir):
|
|
||||||
file = tmpdir.join('test.txt')
|
|
||||||
file.write('testing')
|
|
||||||
testdir = tmpdir.join('testdir')
|
|
||||||
os.mkdir(testdir.strpath)
|
|
||||||
filedest = testdir.join('test.txt')
|
|
||||||
simple_groomer = KittenGroomerBase(tmpdir.strpath, testdir.strpath)
|
|
||||||
simple_groomer.cur_file = FileBase(file.strpath, filedest.strpath)
|
|
||||||
simple_groomer.safe_copy()
|
|
||||||
# check that safe copy can handle weird file path inputs
|
|
||||||
|
|
||||||
def test_list_all_files(self, tmpdir):
|
def test_list_all_files(self, tmpdir):
|
||||||
file = tmpdir.join('test.txt')
|
file = tmpdir.join('test.txt')
|
||||||
file.write('testing')
|
file.write('testing')
|
||||||
|
|
Loading…
Reference in New Issue