diff --git a/bin/filecheck.py b/bin/filecheck.py index 157c83f..8af9c2c 100644 --- a/bin/filecheck.py +++ b/bin/filecheck.py @@ -120,9 +120,9 @@ class File(FileBase): } def _check_dangerous(self): - if not self.has_mimetype(): + if not self.has_mimetype: self.make_dangerous('no mimetype') - if not self.has_extension(): + if not self.has_extension: self.make_dangerous('no extension') if self.extension in Config.malicious_exts: self.make_dangerous('malicious_extension') @@ -156,17 +156,17 @@ class File(FileBase): expected_extensions = mimetypes.guess_all_extensions(mimetype, strict=False) if expected_extensions: - if self.has_extension() and self.extension not in expected_extensions: + if self.has_extension and self.extension not in expected_extensions: # LOG: improve this string self.make_dangerous('expected extensions') def check(self): self._check_dangerous() - if self.has_extension(): + if self.has_extension: self._check_extension() - if self.has_mimetype(): + if self.has_mimetype: self._check_mimetype() - if not self.is_dangerous(): + if not self.is_dangerous: self.mime_processing_options.get(self.main_type, self.unknown)() # ##### Helper functions ##### @@ -178,11 +178,6 @@ class File(FileBase): dict_to_return[subtype] = method return dict_to_return - def write_log(self): - """Print the logs related to the current file being processed.""" - # LOG: move to helpers.py GroomerLogger and modify - tmp_log = self.logger.log.fields(**self._file_props) - @property def has_metadata(self): if self.mimetype in Config.mimes_metadata: @@ -200,7 +195,7 @@ class File(FileBase): # ##### Discarded mimetypes, reason in the docstring ###### def inode(self): """Empty file or symlink.""" - if self.is_symlink(): + if self.is_symlink: symlink_path = self.get_property('symlink') self.add_file_string('Symlink to {}'.format(symlink_path)) else: @@ -479,19 +474,19 @@ class KittenGroomerFileCheck(KittenGroomerBase): def process_dir(self, src_dir, dst_dir): """Main function coordinating file processing.""" - # LOG: we probably want to move this write_log elsewhere: + # LOG: what's the purpose of this write log?: # if self.recursive_archive_depth > 0: # self.write_log() - # TODO: Can we clean up the way we handle relative_path? + # We're writing the log here because... + # How exactly does the workflow work with an archive? for srcpath in self.list_all_files(src_dir): dstpath = srcpath.replace(src_dir, dst_dir) - relative_path = srcpath.replace(src_dir + '/', '') + # TODO: Can we clean up the way we handle relative_path? + # Relative path is here so that when we print files in the log it + # shows only the file's path. Should we just pass it to the logger + # when we create it? Or let the logger figure it out? + # relative_path = srcpath.replace(src_dir + '/', '') self.cur_file = File(srcpath, dstpath, self.logger) - # LOG: move this logging code elsewhere - self.logger.log.info('Processing {} ({}/{})', - relative_path, - self.cur_file.main_type, - self.cur_file.sub_type) self.process_file(self.cur_file) def process_file(self, file): diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py index f422be5..9f80b72 100644 --- a/kittengroomer/helpers.py +++ b/kittengroomer/helpers.py @@ -112,6 +112,7 @@ class FileBase(object): size = 0 return size + @property def has_mimetype(self): """Returns True if file has a full mimetype, else False.""" if not self.main_type or not self.sub_type: @@ -121,6 +122,7 @@ class FileBase(object): else: return True + @property def has_extension(self): """Returns True if self.extension is set, else False.""" if self.extension is None: @@ -130,18 +132,22 @@ class FileBase(object): else: return True + @property def is_dangerous(self): """True if file has been marked 'dangerous' else False.""" return self._file_props['safety_category'] is 'dangerous' + @property def is_unknown(self): """True if file has been marked 'unknown' else False.""" return self._file_props['safety_category'] is 'unknown' + @property def is_binary(self): """True if file has been marked 'binary' else False.""" return self._file_props['safety_category'] is 'binary' + @property def is_symlink(self): """Returns True and updates log if file is a symlink.""" if self._file_props['symlink'] is False: @@ -178,7 +184,7 @@ class FileBase(object): Prepends and appends DANGEROUS to the destination file name to help prevent double-click of death. """ - if self.is_dangerous(): + if self.is_dangerous: return self.set_property('safety_category', 'dangerous') # LOG: store reason string @@ -187,7 +193,7 @@ class FileBase(object): def make_unknown(self): """Marks a file as an unknown type and prepends UNKNOWN to filename.""" - if self.is_dangerous() or self.is_binary(): + if self.is_dangerous or self.is_binary: return self.set_property('safety_category', 'unknown') path, filename = os.path.split(self.dst_path) @@ -195,7 +201,7 @@ class FileBase(object): def make_binary(self): """Marks a file as a binary and appends .bin to filename.""" - if self.is_dangerous(): + if self.is_dangerous: return self.set_property('safety_category', 'binary') path, filename = os.path.split(self.dst_path) @@ -242,6 +248,10 @@ class FileBase(object): self.add_error(e, '') return False + def write_log(self): + """Print the logs related to the current file being processed.""" + tmp_log = self.logger.log.fields(**self._file_props) + class GroomerLogger(object): """Groomer logging interface""" @@ -291,7 +301,6 @@ class GroomerLogger(object): return s.hexdigest() def add_file(self, file): - # File object will add itself? # return a sublog for the file pass diff --git a/tests/test_kittengroomer.py b/tests/test_kittengroomer.py index 41f9a40..940136d 100644 --- a/tests/test_kittengroomer.py +++ b/tests/test_kittengroomer.py @@ -104,23 +104,23 @@ class TestFileBase: assert generic_conf_file.mimetype == 'text/plain' assert generic_conf_file.main_type == 'text' assert generic_conf_file.sub_type == 'plain' - assert generic_conf_file.has_mimetype() + assert generic_conf_file.has_mimetype # Need to test something without a mimetype # Need to test something that's a directory # Need to test something that causes the unicode exception def test_has_mimetype_no_main_type(self, generic_conf_file): generic_conf_file.main_type = '' - assert generic_conf_file.has_mimetype() is False + assert generic_conf_file.has_mimetype is False def test_has_mimetype_no_sub_type(self, generic_conf_file): generic_conf_file.sub_type = '' - assert generic_conf_file.has_mimetype() is False + assert generic_conf_file.has_mimetype is False def test_has_extension(self, temp_file, temp_file_no_ext): - assert temp_file.has_extension() is True + assert temp_file.has_extension is True print(temp_file_no_ext.extension) - assert temp_file_no_ext.has_extension() is False + assert temp_file_no_ext.has_extension is False def test_set_property(self, generic_conf_file): generic_conf_file.set_property('test', True) @@ -129,14 +129,14 @@ class TestFileBase: def test_marked_dangerous(self, file_marked_all_parameterized): file_marked_all_parameterized.make_dangerous() - assert file_marked_all_parameterized.is_dangerous() is True + assert file_marked_all_parameterized.is_dangerous is True # Should work regardless of weird paths?? # Should check file path alteration behavior as well def test_generic_dangerous(self, generic_conf_file): - assert generic_conf_file.is_dangerous() is False + assert generic_conf_file.is_dangerous is False generic_conf_file.make_dangerous() - assert generic_conf_file.is_dangerous() is True + assert generic_conf_file.is_dangerous is True def test_has_symlink(self, tmpdir): file_path = tmpdir.join('test.txt') @@ -147,45 +147,45 @@ class TestFileBase: os.symlink(file_path, symlink_path) file = FileBase(file_path, file_path) symlink = FileBase(symlink_path, symlink_path) - assert file.is_symlink() is False - assert symlink.is_symlink() is True + assert file.is_symlink is False + assert symlink.is_symlink is True def test_has_symlink_fixture(self, symlink_file): - assert symlink_file.is_symlink() is True + assert symlink_file.is_symlink is True def test_generic_make_unknown(self, generic_conf_file): - assert generic_conf_file.is_unknown() is False + assert generic_conf_file.is_unknown is False generic_conf_file.make_unknown() - assert generic_conf_file.is_unknown() + assert generic_conf_file.is_unknown # given a FileBase object with no marking, should do the right things def test_marked_make_unknown(self, file_marked_all_parameterized): file = file_marked_all_parameterized - if file.is_unknown(): + if file.is_unknown: file.make_unknown() - assert file.is_unknown() + assert file.is_unknown else: - assert file.is_unknown() is False + assert file.is_unknown is False file.make_unknown() - assert file.is_unknown() is False + assert file.is_unknown is False # given a FileBase object with an unrecognized marking, should ??? def test_generic_make_binary(self, generic_conf_file): - assert generic_conf_file.is_binary() is False + assert generic_conf_file.is_binary is False generic_conf_file.make_binary() - assert generic_conf_file.is_binary() is True + assert generic_conf_file.is_binary def test_marked_make_binary(self, file_marked_all_parameterized): file = file_marked_all_parameterized - if file.is_dangerous(): + if file.is_dangerous: file.make_binary() - assert file.is_binary() is False + assert file.is_binary is False else: file.make_binary() - assert file.is_binary() + assert file.is_binary def test_force_ext_change(self, generic_conf_file): - assert generic_conf_file.has_extension() + assert generic_conf_file.has_extension assert generic_conf_file.get_property('extension') == '.conf' assert os.path.splitext(generic_conf_file.dst_path)[1] == '.conf' generic_conf_file.force_ext('.txt') @@ -195,7 +195,7 @@ class TestFileBase: # should be able to handle weird paths def test_force_ext_correct(self, generic_conf_file): - assert generic_conf_file.has_extension() + assert generic_conf_file.has_extension assert generic_conf_file.get_property('extension') == '.conf' generic_conf_file.force_ext('.conf') assert os.path.splitext(generic_conf_file.dst_path)[1] == '.conf' @@ -212,6 +212,10 @@ class TestFileBase: # if metadata file already exists # if there is no metadata to write should this work? + def test_safe_copy(self, generic_conf_file): + generic_conf_file.safe_copy() + # check that safe copy can handle weird file path inputs + class TestLogger: @@ -246,17 +250,6 @@ class TestKittenGroomerBase: dest_directory, debug=True) - def test_safe_copy(self, tmpdir): - file = tmpdir.join('test.txt') - file.write('testing') - testdir = tmpdir.join('testdir') - os.mkdir(testdir.strpath) - filedest = testdir.join('test.txt') - simple_groomer = KittenGroomerBase(tmpdir.strpath, testdir.strpath) - simple_groomer.cur_file = FileBase(file.strpath, filedest.strpath) - simple_groomer.safe_copy() - # check that safe copy can handle weird file path inputs - def test_list_all_files(self, tmpdir): file = tmpdir.join('test.txt') file.write('testing')