From 146f258e54c27b2c6d30c2d4b586b40e71abc2b1 Mon Sep 17 00:00:00 2001 From: Dan Puttick Date: Wed, 30 Nov 2016 21:04:59 -0500 Subject: [PATCH 1/8] Refactored mimetype code into its own method --- kittengroomer/helpers.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py index 3a20a74..0461d0e 100644 --- a/kittengroomer/helpers.py +++ b/kittengroomer/helpers.py @@ -2,12 +2,13 @@ # -*- coding: utf-8 -*- import os import sys -import magic import hashlib import shutil -from twiggy import quick_setup, log import argparse +import magic +from twiggy import quick_setup, log + class KittenGroomerError(Exception): def __init__(self, message): @@ -37,7 +38,9 @@ class FileBase(object): self.log_details = {'filepath': self.src_path} self.log_string = '' a, self.extension = os.path.splitext(self.src_path) + self._get_mimetype() + def _get_mimetype(self): if os.path.islink(self.src_path): # magic will throw an IOError on a broken symlink self.mimetype = 'inode/symlink' From 62a7f680b4abf1010f700d29e5bab3e361a6eb17 Mon Sep 17 00:00:00 2001 From: Dan Puttick Date: Mon, 5 Dec 2016 21:02:46 -0500 Subject: [PATCH 2/8] Added/changed docstrings to FileBase --- kittengroomer/helpers.py | 77 +++++++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 32 deletions(-) diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py index 0461d0e..f01a073 100644 --- a/kittengroomer/helpers.py +++ b/kittengroomer/helpers.py @@ -1,5 +1,13 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- + +""" +Contains the base objects for use when creating a sanitizer using +PyCIRCLean. Subclass FileBase and KittenGroomerBase to implement your +desired behavior. +""" + + import os import sys import hashlib @@ -11,36 +19,36 @@ from twiggy import quick_setup, log class KittenGroomerError(Exception): + """Base KittenGroomer exception handler.""" + def __init__(self, message): - ''' - Base KittenGroomer exception handler. - ''' super(KittenGroomerError, self).__init__(message) self.message = message class ImplementationRequired(KittenGroomerError): - ''' - Implementation required error - ''' + """Implementation required error.""" + pass class FileBase(object): + """ + Base object for individual files in the source directory. Has information + about the file as attributes and various helper methods. Initialised with + the source path and expected destination path. Subclass and add attributes + or methods relevant to a given implementation." + """ def __init__(self, src_path, dst_path): - ''' - Contains base information for a file on the source USB key, - initialised with expected src and dest path - ''' self.src_path = src_path self.dst_path = dst_path self.log_details = {'filepath': self.src_path} self.log_string = '' - a, self.extension = os.path.splitext(self.src_path) - self._get_mimetype() + _, self.extension = os.path.splitext(self.src_path) + self._determine_mimetype() - def _get_mimetype(self): + def _determine_mimetype(self): if os.path.islink(self.src_path): # magic will throw an IOError on a broken symlink self.mimetype = 'inode/symlink' @@ -55,7 +63,6 @@ class FileBase(object): self.mimetype = mt.decode("utf-8") except: self.mimetype = mt - if self.mimetype and '/' in self.mimetype: self.main_type, self.sub_type = self.mimetype.split('/') else: @@ -63,40 +70,53 @@ class FileBase(object): self.sub_type = '' def has_mimetype(self): + """ + Returns True if file has a full mimetype, else False. + + Returns False + updates log if self.main_type or self.sub_type + are not set. + """ + if not self.main_type or not self.sub_type: self.log_details.update({'broken_mime': True}) return False return True def has_extension(self): + """ + Returns True if self.extension is set, else False. + + Returns False + updates self.log_details if self.extension is not set. + """ if not self.extension: self.log_details.update({'no_extension': True}) return False return True def is_dangerous(self): + """Returns True if self.log_details contains 'dangerous'.""" if self.log_details.get('dangerous'): return True return False def is_symlink(self): + """Returns True and updates log if file is a symlink.""" if self.has_mimetype() and self.main_type == 'inode' and self.sub_type == 'symlink': self.log_details.update({'symlink': os.readlink(self.src_path)}) return True return False def add_log_details(self, key, value): - ''' - Add an entry in the log dictionary - ''' + """Takes a key + a value and adds them to self.log_details.""" self.log_details[key] = value def make_dangerous(self): - ''' - This file should be considered as dangerous and never run. - Prepending and appending DANGEROUS to the destination - file name avoid double-click of death - ''' + """ + Marks a file as dangerous. + + Prepends and appends DANGEROUS to the destination file name + to avoid double-click of death. + """ if self.is_dangerous(): # Already marked as dangerous, do nothing return @@ -105,11 +125,7 @@ class FileBase(object): self.dst_path = os.path.join(path, 'DANGEROUS_{}_DANGEROUS'.format(filename)) def make_unknown(self): - ''' - This file has an unknown type and it was not possible to take - a decision. Theuser will have to decide what to do. - Prepending UNKNOWN - ''' + """Marks a file as an unknown type and prepends UNKNOWN to filename.""" if self.is_dangerous() or self.log_details.get('binary'): # Already marked as dangerous or binary, do nothing return @@ -118,11 +134,7 @@ class FileBase(object): self.dst_path = os.path.join(path, 'UNKNOWN_{}'.format(filename)) def make_binary(self): - ''' - This file is a binary, and should probably not be run. - Appending .bin avoir double click of death but the user - will have to decide by itself. - ''' + """Marks a file as a binary and appends .bin to filename.""" if self.is_dangerous(): # Already marked as dangerous, do nothing return @@ -131,6 +143,7 @@ class FileBase(object): self.dst_path = os.path.join(path, '{}.bin'.format(filename)) def force_ext(self, ext): + """If dst_path does not end in ext, appends the ext and updates log.""" if not self.dst_path.endswith(ext): self.log_details['force_ext'] = True self.dst_path += ext From 27d15254f430392890c6bda86caa9090c7d57a70 Mon Sep 17 00:00:00 2001 From: Dan Puttick Date: Tue, 6 Dec 2016 12:43:28 -0500 Subject: [PATCH 3/8] Added/changed docstrings for KittenGroomerBase --- kittengroomer/helpers.py | 49 ++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py index f01a073..dec9d55 100644 --- a/kittengroomer/helpers.py +++ b/kittengroomer/helpers.py @@ -34,13 +34,13 @@ class ImplementationRequired(KittenGroomerError): class FileBase(object): """ - Base object for individual files in the source directory. Has information - about the file as attributes and various helper methods. Initialised with - the source path and expected destination path. Subclass and add attributes - or methods relevant to a given implementation." + Base object for individual files in the source directory. Contains file + attributes and various helper methods. Subclass and add attributes + or methods relevant to a given implementation. """ def __init__(self, src_path, dst_path): + """Initialized with the source path and expected destination path.""" self.src_path = src_path self.dst_path = dst_path self.log_details = {'filepath': self.src_path} @@ -150,11 +150,10 @@ class FileBase(object): class KittenGroomerBase(object): + """Base object responsible for copy/sanitization process.""" def __init__(self, root_src, root_dst, debug=False): - ''' - Setup the base options of the copy/convert setup - ''' + """Initialized with path to source and dest directories.""" self.src_root_dir = root_src self.dst_root_dir = root_dst self.log_root_dir = os.path.join(self.dst_root_dir, 'logs') @@ -166,8 +165,8 @@ class KittenGroomerBase(object): quick_setup(file=self.log_processing) self.log_name = log.name('files') - self.ressources_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data') - os.environ["PATH"] += os.pathsep + self.ressources_path + self.resources_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data') + os.environ["PATH"] += os.pathsep + self.resources_path self.cur_file = None @@ -180,6 +179,7 @@ class KittenGroomerBase(object): self.log_debug_out = os.devnull def _computehash(self, path): + """Returns a sha1 hash of a file at a given path.""" s = hashlib.sha1() with open(path, 'rb') as f: while True: @@ -190,6 +190,7 @@ class KittenGroomerBase(object): return s.hexdigest() def tree(self, base_dir, padding=' '): + """Writes a graphical tree to the log for a given directory.""" if sys.version_info.major == 2: self.__tree_py2(base_dir, padding) else: @@ -227,22 +228,22 @@ class KittenGroomerBase(object): # ##### Helpers ##### def _safe_rmtree(self, directory): - '''Remove a directory tree if it exists''' + """Remove a directory tree if it exists.""" if os.path.exists(directory): shutil.rmtree(directory) def _safe_remove(self, filepath): - '''Remove a file if it exists''' + """Remove a file if it exists.""" if os.path.exists(filepath): os.remove(filepath) def _safe_mkdir(self, directory): - '''Make a directory if it does not exist''' + """Make a directory if it does not exist.""" if not os.path.exists(directory): os.makedirs(directory) def _safe_copy(self, src=None, dst=None): - ''' Copy a file and create directory if needed''' + """Copy a file and create directory if needed.""" if src is None: src = self.cur_file.src_path if dst is None: @@ -258,7 +259,7 @@ class KittenGroomerBase(object): return False def _safe_metadata_split(self, ext): - '''Create a separate file to hold this file's metadata''' + """Create a separate file to hold this file's metadata.""" dst = self.cur_file.dst_path try: if os.path.exists(self.cur_file.src_path + ext): @@ -274,31 +275,31 @@ class KittenGroomerBase(object): return False def _list_all_files(self, directory): - ''' Generate an iterator over all the files in a directory tree''' + """Generate an iterator over all the files in a directory tree.""" for root, dirs, files in os.walk(directory): for filename in files: filepath = os.path.join(root, filename) yield filepath def _print_log(self): - ''' - Print log, should be called after each file. + """ + Print log, should be called after each file. - You probably want to reimplement it in the subclass - ''' + You probably want to reimplement it in the subclass. + """ tmp_log = self.log_name.fields(**self.cur_file.log_details) tmp_log.info('It did a thing.') ####################### def processdir(self, src_dir=None, dst_dir=None): - ''' - Main function doing the work, you have to implement it yourself. - ''' - raise ImplementationRequired('You have to implement the result processdir.') + """ + Implement this function in your subclass to define file processing behavior. + """ + raise ImplementationRequired('Please implement processdir.') -def main(kg_implementation, description='Call the KittenGroomer implementation to do things on files present in the source directory to the destination directory'): +def main(kg_implementation, description='Call a KittenGroomer implementation to process files present in the source directory and copy them to the destination directory.'): parser = argparse.ArgumentParser(prog='KittenGroomer', description=description) parser.add_argument('-s', '--source', type=str, help='Source directory') parser.add_argument('-d', '--destination', type=str, help='Destination directory') From 52f56581d13d1f6a6aa613a22d3f6dd49f239b7d Mon Sep 17 00:00:00 2001 From: Dan Puttick Date: Mon, 5 Dec 2016 11:19:39 -0500 Subject: [PATCH 4/8] Skeleton of unit tests using pytest --- tests/test_helpers.py | 98 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 tests/test_helpers.py diff --git a/tests/test_helpers.py b/tests/test_helpers.py new file mode 100644 index 0000000..4597465 --- /dev/null +++ b/tests/test_helpers.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import pytest +from unittest.mock import Mock + +from kittengroomer import FileBase +from kittengroomer import KittenGroomerBase + +### FileBase + +class TestFileBase: + + @pytest.fixture + def empty_filebase(self): + return FileBase('tests/src_simple/blah.conf', '/tests/dst') + + # How do we mock various things that can go wrong with file paths? + # What should the object do if it's given a path that isn't a file? + # We should probably catch everytime that happens and tell the user. + + def test_mimetypes(self): + # the various possible mimetypes including broken ones need to be checked for the right behavior + pass + + + def test_init(self, empty_filebase): + # src_path or dest_path could be invalid + # log could be not created + # what happens if we try to make a new log? + # split extension should handle various weird paths as well + pass + + + def test_dangerous(self): + # given a FileBase object marked as dangerous, should do nothing + # given a FileBase object marked as all other things, should mark as dangerous + # Should work regardless of weird paths?? + pass + + + def test_has_symlink(self): + # given a FileBase object initialized on a symlink, should identify it as a symlink + # given a FileBase object initialized as not a symlink, shouldn't do anything + pass + + + def test_make_unknown(self): + # given a FileBase object with no marking, should do the right things + # given a FileBase object marked unknown, should do nothing + # given a FileBase object marked dangerous, should do nothing + # given a FileBase object with an unrecognized marking, should ??? + pass + + + def test_make_binary(self): + # same as above but for binary + pass + + + def test_force_ext(self): + # should make a file's extension change + # shouldn't change a file's extension if it already is right + # should be able to handle weird paths and filetypes + pass + + +class TestKittenGroomerBase: + + def test_instantiation(self): + # src_path and dest_path + # what if the log file already exists? + # we should probably protect access to self.current_file in some way? + pass + + + def test_computehash(self): + # what are the ways this could go wrong? Should give the same hash every time? + # what is buf doing here? + pass + + + def test_safe_copy(self): + #check that it handles weird file path inputs + pass + + + def test_safe_metadata_split(self): + # if metadata file already exists + # if there is no metadata to write should this work? + # check that returned file is writable? + pass + + + def test_list_all_files(self): + # various possible types of directories + # invalid directory + pass From 196a9fcac12694431fb4810e2f54c8595ff56a75 Mon Sep 17 00:00:00 2001 From: Dan Puttick Date: Thu, 8 Dec 2016 20:12:08 -0500 Subject: [PATCH 5/8] Added unit tests for FileBase --- tests/test_helpers.py | 215 +++++++++++++++++++++++++++++++++++------- tox.ini | 5 + 2 files changed, 185 insertions(+), 35 deletions(-) create mode 100644 tox.ini diff --git a/tests/test_helpers.py b/tests/test_helpers.py index 4597465..c32c7ce 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -1,72 +1,217 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +import os +import sys +import tempfile + import pytest -from unittest.mock import Mock from kittengroomer import FileBase from kittengroomer import KittenGroomerBase +PY3 = sys.version_info.major == 3 +skip = pytest.mark.skip +xfail = pytest.mark.xfail +fixture = pytest.fixture + + ### FileBase class TestFileBase: - @pytest.fixture - def empty_filebase(self): - return FileBase('tests/src_simple/blah.conf', '/tests/dst') + @fixture + def source_file(self): + return 'tests/src_simple/blah.conf' - # How do we mock various things that can go wrong with file paths? - # What should the object do if it's given a path that isn't a file? - # We should probably catch everytime that happens and tell the user. + @fixture + def dest_file(self): + return 'tests/dst/blah.conf' - def test_mimetypes(self): - # the various possible mimetypes including broken ones need to be checked for the right behavior - pass + @fixture + def generic_conf_file(self, source_file, dest_file): + return FileBase(source_file, dest_file) + @fixture + def symlink(self, tmpdir): + file_path = tmpdir.join('test.txt') + file_path.write('testing') + file_path = file_path.strpath + symlink_path = tmpdir.join('symlinked.txt') + symlink_path = symlink_path.strpath + file_symlink = os.symlink(file_path, symlink_path) + return FileBase(symlink_path, symlink_path) - def test_init(self, empty_filebase): - # src_path or dest_path could be invalid - # log could be not created - # what happens if we try to make a new log? - # split extension should handle various weird paths as well - pass + @fixture + def temp_file(self, tmpdir): + file_path = tmpdir.join('test.txt') + file_path.write('testing') + file_path = file_path.strpath + return FileBase(file_path, file_path) + @fixture + def temp_file_no_ext(self, tmpdir): + file_path = tmpdir.join('test') + file_path.write('testing') + file_path = file_path.strpath + return FileBase(file_path, file_path) - def test_dangerous(self): - # given a FileBase object marked as dangerous, should do nothing - # given a FileBase object marked as all other things, should mark as dangerous + @fixture + def file_marked_dangerous(self, generic_conf_file): + generic_conf_file.make_dangerous() + return file + + @fixture + def file_marked_unknown(self, generic_conf_file): + generic_conf_file.make_unknown() + return file + + @fixture + def file_marked_binary(self, generic_conf_file): + generic_conf_file.mark_binary() + return file + + @fixture(params=[ + FileBase.make_dangerous, + FileBase.make_unknown, + FileBase.make_binary + ]) + def file_marked_all_parameterized(self, request, generic_conf_file): + request.param(generic_conf_file) + return generic_conf_file + + # What are the various things that can go wrong with file paths? We should have fixtures for them + # What should the object do if it's given a path that isn't a file? Currently magic throws an exception + # We should probably catch everytime that happens and tell the user what happened (and maybe put it in the log) + + def test_create(self): + file = FileBase('tests/src_simple/blah.conf', '/tests/dst/blah.conf') + + def test_create_broken(self, tmpdir): + with pytest.raises(TypeError): + file_no_args = FileBase() + if PY3: + with pytest.raises(FileNotFoundError): + file_empty_args = FileBase('', '') + else: + with pytest.raises(IOError): + file_empty_args = FileBase('', '') + if PY3: + with pytest.raises(IsADirectoryError): + file_directory = FileBase(tmpdir.strpath, tmpdir.strpath) + else: + with pytest.raises(IOError): + file_directory = FileBase(tmpdir.strpath, tmpdir.strpath) + # are there other cases here? path to a file that doesn't exist? permissions? + + def test_init(self, generic_conf_file): + file = generic_conf_file + assert file.log_details + assert file.log_details['filepath'] == file.src_path + assert file.extension == '.conf' + copied_log = file.log_details.copy() + file.log_details = '' + # assert file.log_details == copied_log # this fails for now, we need to make log_details undeletable + # we should probably check for more extensions here + + def test_mimetypes(self, generic_conf_file): + assert generic_conf_file.has_mimetype() + assert generic_conf_file.mimetype == 'text/plain' + assert generic_conf_file.main_type == 'text' + assert generic_conf_file.sub_type == 'plain' + # FileBase(source_path) + # this is essentially testing the behavior of magic. I guess for now it's ok if we just test for some basic file types? + + def test_has_extension(self, temp_file, temp_file_no_ext): + assert temp_file.has_extension() == True + assert temp_file_no_ext.has_extension() == False + assert temp_file_no_ext.log_details.get('no_extension') == True + + def test_marked_dangerous(self, file_marked_all_parameterized): + file_marked_all_parameterized.make_dangerous() + assert file_marked_all_parameterized.is_dangerous() == True # Should work regardless of weird paths?? - pass + # Should check file path alteration behavior as well + def test_generic_dangerous(self, generic_conf_file): + assert generic_conf_file.is_dangerous() == False + generic_conf_file.make_dangerous() + assert generic_conf_file.is_dangerous() == True - def test_has_symlink(self): - # given a FileBase object initialized on a symlink, should identify it as a symlink - # given a FileBase object initialized as not a symlink, shouldn't do anything - pass + def test_has_symlink(self, tmpdir): + file_path = tmpdir.join('test.txt') + file_path.write('testing') + file_path = file_path.strpath + symlink_path = tmpdir.join('symlinked.txt') + symlink_path = symlink_path.strpath + file_symlink = os.symlink(file_path, symlink_path) + file = FileBase(file_path, file_path) + symlink = FileBase(symlink_path, symlink_path) + assert file.is_symlink() == False + assert symlink.is_symlink() == True + def test_has_symlink_fixture(self, symlink): + assert symlink.is_symlink() == True - def test_make_unknown(self): + def test_generic_make_unknown(self, generic_conf_file): + assert generic_conf_file.log_details.get('unknown') == None + generic_conf_file.make_unknown() + assert generic_conf_file.log_details.get('unknown') == True # given a FileBase object with no marking, should do the right things - # given a FileBase object marked unknown, should do nothing - # given a FileBase object marked dangerous, should do nothing + + def test_marked_make_unknown(self, file_marked_all_parameterized): + file = file_marked_all_parameterized + if file.log_details.get('unknown'): + file.make_unknown() + assert file.log_details.get('unknown') == True + else: + assert file.log_details.get('unknown') == None + file.make_unknown() + assert file.log_details.get('unknown') == None # given a FileBase object with an unrecognized marking, should ??? - pass + def test_generic_make_binary(self, generic_conf_file): + assert generic_conf_file.log_details.get('binary') == None + generic_conf_file.make_binary() + assert generic_conf_file.log_details.get('binary') == True - def test_make_binary(self): - # same as above but for binary - pass + def test_marked_make_binary(self, file_marked_all_parameterized): + file = file_marked_all_parameterized + if file.log_details.get('dangerous'): + file.make_binary() + assert file.log_details.get('binary') == None + else: + file.make_binary() + assert file.log_details.get('binary') == True - - def test_force_ext(self): + def test_force_ext_change(self, generic_conf_file): + assert generic_conf_file.has_extension() + assert generic_conf_file.extension == '.conf' + assert os.path.splitext(generic_conf_file.dst_path)[1] == '.conf' + generic_conf_file.force_ext('.txt') + assert os.path.splitext(generic_conf_file.dst_path)[1] == '.txt' + assert generic_conf_file.log_details.get('force_ext') == True # should make a file's extension change + # should be able to handle weird paths + + def test_force_ext_correct(self, generic_conf_file): + assert generic_conf_file.has_extension() + assert generic_conf_file.extension == '.conf' + generic_conf_file.force_ext('.conf') + assert os.path.splitext(generic_conf_file.dst_path)[1] == '.conf' + assert generic_conf_file.log_details.get('force_ext') == None # shouldn't change a file's extension if it already is right - # should be able to handle weird paths and filetypes - pass class TestKittenGroomerBase: + @fixture + def generic_groomer(self): + return KittenGroomerBase('tests/src_complex', 'tests/dst') + + def test_create(self, generic_groomer): + assert generic_groomer + def test_instantiation(self): # src_path and dest_path # what if the log file already exists? diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..da1ed31 --- /dev/null +++ b/tox.ini @@ -0,0 +1,5 @@ +[tox] +envlist=py27,py35 +[testenv] +deps=-rdev-requirements.txt +commands= pytest tests/test_helpers.py --cov=kittengroomer From 0364e038e1893a3c6c92a8f2527baf23691bc423 Mon Sep 17 00:00:00 2001 From: Dan Puttick Date: Fri, 9 Dec 2016 16:13:54 -0500 Subject: [PATCH 6/8] Added unit tests for KittenGroomerBase --- bin/generic.py | 2 +- kittengroomer/helpers.py | 2 +- tests/test_helpers.py | 160 +++++++++++++++++++++++++-------------- 3 files changed, 107 insertions(+), 57 deletions(-) diff --git a/bin/generic.py b/bin/generic.py index d368755..e76fccd 100644 --- a/bin/generic.py +++ b/bin/generic.py @@ -258,7 +258,7 @@ class KittenGroomer(KittenGroomerBase): self._safe_mkdir(tmpdir) # The magic comes from here: http://svn.ghostscript.com/ghostscript/trunk/gs/doc/Ps2pdf.htm#PDFA curdir = os.getcwd() - os.chdir(self.ressources_path) + os.chdir(self.resources_path) gs_command = '{} -dPDFA -dQUIET -dSAFER -dBATCH -dNOPAUSE -dNOOUTERSAVE -sProcessColorModel=DeviceCMYK -sDEVICE=pdfwrite -sPDFACompatibilityPolicy=1 -sOutputFile="{}" ./PDFA_def.ps "{}"'.format( GS, os.path.join(curdir, tmppath), os.path.join(curdir, self.cur_file.src_path)) self._run_process(gs_command) diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py index dec9d55..990f899 100644 --- a/kittengroomer/helpers.py +++ b/kittengroomer/helpers.py @@ -262,7 +262,7 @@ class KittenGroomerBase(object): """Create a separate file to hold this file's metadata.""" dst = self.cur_file.dst_path try: - if os.path.exists(self.cur_file.src_path + ext): + if os.path.exists(self.cur_file.src_path + ext): # should we check dst_path as well? raise KittenGroomerError("Cannot create split metadata file for \"" + self.cur_file.dst_path + "\", type '" + ext + "': File exists.") diff --git a/tests/test_helpers.py b/tests/test_helpers.py index c32c7ce..a14510a 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -3,12 +3,11 @@ import os import sys -import tempfile import pytest -from kittengroomer import FileBase -from kittengroomer import KittenGroomerBase +from kittengroomer import FileBase, KittenGroomerBase +from kittengroomer.helpers import ImplementationRequired PY3 = sys.version_info.major == 3 skip = pytest.mark.skip @@ -16,7 +15,7 @@ xfail = pytest.mark.xfail fixture = pytest.fixture -### FileBase +# FileBase class TestFileBase: @@ -39,7 +38,7 @@ class TestFileBase: file_path = file_path.strpath symlink_path = tmpdir.join('symlinked.txt') symlink_path = symlink_path.strpath - file_symlink = os.symlink(file_path, symlink_path) + os.symlink(file_path, symlink_path) return FileBase(symlink_path, symlink_path) @fixture @@ -59,17 +58,17 @@ class TestFileBase: @fixture def file_marked_dangerous(self, generic_conf_file): generic_conf_file.make_dangerous() - return file + return generic_conf_file @fixture def file_marked_unknown(self, generic_conf_file): generic_conf_file.make_unknown() - return file + return generic_conf_file @fixture def file_marked_binary(self, generic_conf_file): generic_conf_file.mark_binary() - return file + return generic_conf_file @fixture(params=[ FileBase.make_dangerous, @@ -81,8 +80,8 @@ class TestFileBase: return generic_conf_file # What are the various things that can go wrong with file paths? We should have fixtures for them - # What should the object do if it's given a path that isn't a file? Currently magic throws an exception - # We should probably catch everytime that happens and tell the user what happened (and maybe put it in the log) + # What should FileBase do if it's given a path that isn't a file (doesn't exist or is a dir)? Currently magic throws an exception + # We should probably catch everytime that happens and tell the user explicitly happened (and maybe put it in the log) def test_create(self): file = FileBase('tests/src_simple/blah.conf', '/tests/dst/blah.conf') @@ -119,24 +118,39 @@ class TestFileBase: assert generic_conf_file.mimetype == 'text/plain' assert generic_conf_file.main_type == 'text' assert generic_conf_file.sub_type == 'plain' - # FileBase(source_path) - # this is essentially testing the behavior of magic. I guess for now it's ok if we just test for some basic file types? + # Need to test something without a mimetype + # Need to test something that's a directory + # Need to test something that causes the unicode exception + + def test_has_mimetype_no_main_type(self, generic_conf_file): + generic_conf_file.main_type = '' + assert generic_conf_file.has_mimetype() is False + + def test_has_mimetype_no_sub_type(self, generic_conf_file): + generic_conf_file.sub_type = '' + assert generic_conf_file.has_mimetype() is False def test_has_extension(self, temp_file, temp_file_no_ext): - assert temp_file.has_extension() == True - assert temp_file_no_ext.has_extension() == False - assert temp_file_no_ext.log_details.get('no_extension') == True + assert temp_file.has_extension() is True + assert temp_file_no_ext.has_extension() is False + assert temp_file_no_ext.log_details.get('no_extension') is True + + def test_add_log_details(self, generic_conf_file): + generic_conf_file.add_log_details('test', True) + assert generic_conf_file.log_details['test'] is True + with pytest.raises(KeyError): + assert generic_conf_file.log_details['wrong'] is False def test_marked_dangerous(self, file_marked_all_parameterized): file_marked_all_parameterized.make_dangerous() - assert file_marked_all_parameterized.is_dangerous() == True + assert file_marked_all_parameterized.is_dangerous() is True # Should work regardless of weird paths?? # Should check file path alteration behavior as well def test_generic_dangerous(self, generic_conf_file): - assert generic_conf_file.is_dangerous() == False + assert generic_conf_file.is_dangerous() is False generic_conf_file.make_dangerous() - assert generic_conf_file.is_dangerous() == True + assert generic_conf_file.is_dangerous() is True def test_has_symlink(self, tmpdir): file_path = tmpdir.join('test.txt') @@ -147,42 +161,42 @@ class TestFileBase: file_symlink = os.symlink(file_path, symlink_path) file = FileBase(file_path, file_path) symlink = FileBase(symlink_path, symlink_path) - assert file.is_symlink() == False - assert symlink.is_symlink() == True + assert file.is_symlink() is False + assert symlink.is_symlink() is True def test_has_symlink_fixture(self, symlink): - assert symlink.is_symlink() == True + assert symlink.is_symlink() is True def test_generic_make_unknown(self, generic_conf_file): - assert generic_conf_file.log_details.get('unknown') == None + assert generic_conf_file.log_details.get('unknown') is None generic_conf_file.make_unknown() - assert generic_conf_file.log_details.get('unknown') == True + assert generic_conf_file.log_details.get('unknown') is True # given a FileBase object with no marking, should do the right things def test_marked_make_unknown(self, file_marked_all_parameterized): file = file_marked_all_parameterized if file.log_details.get('unknown'): file.make_unknown() - assert file.log_details.get('unknown') == True + assert file.log_details.get('unknown') is True else: - assert file.log_details.get('unknown') == None + assert file.log_details.get('unknown') is None file.make_unknown() - assert file.log_details.get('unknown') == None + assert file.log_details.get('unknown') is None # given a FileBase object with an unrecognized marking, should ??? def test_generic_make_binary(self, generic_conf_file): - assert generic_conf_file.log_details.get('binary') == None + assert generic_conf_file.log_details.get('binary') is None generic_conf_file.make_binary() - assert generic_conf_file.log_details.get('binary') == True + assert generic_conf_file.log_details.get('binary') is True def test_marked_make_binary(self, file_marked_all_parameterized): file = file_marked_all_parameterized if file.log_details.get('dangerous'): file.make_binary() - assert file.log_details.get('binary') == None + assert file.log_details.get('binary') is None else: file.make_binary() - assert file.log_details.get('binary') == True + assert file.log_details.get('binary') is True def test_force_ext_change(self, generic_conf_file): assert generic_conf_file.has_extension() @@ -190,7 +204,7 @@ class TestFileBase: assert os.path.splitext(generic_conf_file.dst_path)[1] == '.conf' generic_conf_file.force_ext('.txt') assert os.path.splitext(generic_conf_file.dst_path)[1] == '.txt' - assert generic_conf_file.log_details.get('force_ext') == True + assert generic_conf_file.log_details.get('force_ext') is True # should make a file's extension change # should be able to handle weird paths @@ -199,45 +213,81 @@ class TestFileBase: assert generic_conf_file.extension == '.conf' generic_conf_file.force_ext('.conf') assert os.path.splitext(generic_conf_file.dst_path)[1] == '.conf' - assert generic_conf_file.log_details.get('force_ext') == None + assert generic_conf_file.log_details.get('force_ext') is None # shouldn't change a file's extension if it already is right class TestKittenGroomerBase: - + @fixture - def generic_groomer(self): - return KittenGroomerBase('tests/src_complex', 'tests/dst') + def source_directory(self): + return 'tests/src_complex' + + @fixture + def dest_directory(self): + return 'tests/dst' + + @fixture + def generic_groomer(self, source_directory, dest_directory): + return KittenGroomerBase(source_directory, dest_directory) def test_create(self, generic_groomer): assert generic_groomer - def test_instantiation(self): - # src_path and dest_path - # what if the log file already exists? - # we should probably protect access to self.current_file in some way? - pass + def test_instantiation(self, source_directory, dest_directory): + groomer = KittenGroomerBase(source_directory, dest_directory) + debug_groomer = KittenGroomerBase(source_directory, + dest_directory, + debug=True) + # we should maybe protect access to self.current_file in some way? + def test_computehash(self, tmpdir): + file = tmpdir.join('test.txt') + file.write('testing') + simple_groomer = KittenGroomerBase(tmpdir.strpath, tmpdir.strpath) + simple_groomer._computehash(file.strpath) - def test_computehash(self): - # what are the ways this could go wrong? Should give the same hash every time? - # what is buf doing here? - pass + def test_tree(self, generic_groomer): + generic_groomer.tree(generic_groomer.src_root_dir) - - def test_safe_copy(self): + def test_safe_copy(self, tmpdir): + file = tmpdir.join('test.txt') + file.write('testing') + testdir = tmpdir.join('testdir') + os.mkdir(testdir.strpath) + filedest = testdir.join('test.txt') + simple_groomer = KittenGroomerBase(tmpdir.strpath, testdir.strpath) + simple_groomer.cur_file = FileBase(file.strpath, filedest.strpath) + assert simple_groomer._safe_copy() is True #check that it handles weird file path inputs - pass - - def test_safe_metadata_split(self): + def test_safe_metadata_split(self, tmpdir): + file = tmpdir.join('test.txt') + file.write('testing') + simple_groomer = KittenGroomerBase(tmpdir.strpath, tmpdir.strpath) + simple_groomer.cur_file = FileBase(file.strpath, file.strpath) + metadata_file = simple_groomer._safe_metadata_split('metadata.log') + metadata_file.write('Have some metadata!') + metadata_file.close() + assert simple_groomer._safe_metadata_split('') is False # if metadata file already exists # if there is no metadata to write should this work? - # check that returned file is writable? - pass + def test_list_all_files(self, tmpdir): + file = tmpdir.join('test.txt') + file.write('testing') + testdir = tmpdir.join('testdir') + os.mkdir(testdir.strpath) + simple_groomer = KittenGroomerBase(tmpdir.strpath, tmpdir.strpath) + files = simple_groomer._list_all_files(simple_groomer.src_root_dir) + assert file.strpath in files + assert testdir.strpath not in files - def test_list_all_files(self): - # various possible types of directories - # invalid directory - pass + def test_print_log(self, generic_groomer): + with pytest.raises(AttributeError): + generic_groomer._print_log() + # Kind of a bad test, but this should be implemented by the user anyway + + def test_processdir(self, generic_groomer): + with pytest.raises(ImplementationRequired): + generic_groomer.processdir() From 4f851435e513d5ccf1a3882cbd56977063e06b5f Mon Sep 17 00:00:00 2001 From: Dan Puttick Date: Fri, 9 Dec 2016 11:41:19 -0500 Subject: [PATCH 7/8] Updated path traversal link, changed pip installs --- .travis.yml | 50 ++++++++++++++++++++++++------------------------ bin/filecheck.py | 2 ++ 2 files changed, 27 insertions(+), 25 deletions(-) diff --git a/.travis.yml b/.travis.yml index b3d88a4..aa24bbd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,13 +1,15 @@ language: python python: - - "2.7_with_system_site_packages" - - "3.3" - - "3.4" - - "3.5" - - "nightly" + - 2.7_with_system_site_packages + - 2.7.11 + # - 3.3 + # - 3.4 + # - 3.5 + # - nightly sudo: required +# do we need sudo? should double check dist: trusty @@ -44,18 +46,16 @@ install: - sudo apt-get install libxml2-dev libxslt1-dev - wget https://didierstevens.com/files/software/pdfid_v0_2_1.zip - unzip pdfid_v0_2_1.zip + - pip install -U pip + - pip install lxml exifread pillow + - pip install git+https://github.com/Rafiot/officedissector.git - | - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7_with_system_site_packages" ]]; then - sudo pip install -U pip lxml exifread pillow - sudo pip install -U git+https://github.com/Rafiot/officedissector.git - sudo pip install -U oletools olefile coveralls codecov pytest-cov - else - pip install -U pip lxml exifread pillow - pip install -U git+https://github.com/Rafiot/officedissector.git - pip install -U coveralls codecov pytest-cov + if [[ "$TRAVIS_PYTHON_VERSION" == 2* ]]; then + pip install -U oletools olefile fi # Module dependencies - pip install -r dev-requirements.txt + - pip install coveralls codecov # Testing dependencies - sudo apt-get install rar # Prepare tests @@ -65,18 +65,18 @@ install: - python unpackall.py - popd - mv theZoo/malwares/Binaries/out tests/src_complex/ - # path traversal - # - hg clone https://bitbucket.org/jwilk/path-traversal-samples - # - pushd path-traversal-samples - # - pushd zip - # - make - # - popd - # - pushd rar - # - make - # - popd - # - popd - # - mv path-traversal-samples/zip/*.zip tests/src_complex/ - # - mv path-traversal-samples/rar/*.rar tests/src_complex/ + # Path traversal + - git clone https://github.com/jwilk/path-traversal-samples + - pushd path-traversal-samples + - pushd zip + - make + - popd + - pushd rar + - make + - popd + - popd + - mv path-traversal-samples/zip/*.zip tests/src_complex/ + - mv path-traversal-samples/rar/*.rar tests/src_complex/ # Office docs - git clone https://github.com/eea/odfpy.git - mv odfpy/tests/examples/* tests/src_complex/ diff --git a/bin/filecheck.py b/bin/filecheck.py index f35f4b7..1d3873c 100644 --- a/bin/filecheck.py +++ b/bin/filecheck.py @@ -1,6 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- import os +import sys import mimetypes import shlex import subprocess @@ -21,6 +22,7 @@ from pdfid import PDFiD, cPDFiD from kittengroomer import FileBase, KittenGroomerBase, main SEVENZ = '/usr/bin/7z' +PY3 = sys.version_info.major == 3 # Prepare application/ From 18373e5292397e11af64d8801a949c1aaa7b4575 Mon Sep 17 00:00:00 2001 From: Dan Puttick Date: Tue, 13 Dec 2016 12:36:18 -0500 Subject: [PATCH 8/8] Removed 2.7_with_site_packages from python versions --- .travis.yml | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/.travis.yml b/.travis.yml index aa24bbd..fc6e793 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,12 +1,11 @@ language: python python: - - 2.7_with_system_site_packages - - 2.7.11 - # - 3.3 - # - 3.4 - # - 3.5 - # - nightly + - 2.7 + - 3.3 + - 3.4 + - 3.5 + - nightly sudo: required # do we need sudo? should double check @@ -50,7 +49,7 @@ install: - pip install lxml exifread pillow - pip install git+https://github.com/Rafiot/officedissector.git - | - if [[ "$TRAVIS_PYTHON_VERSION" == 2* ]]; then + if [[ "$TRAVIS_PYTHON_VERSION" == 2* ]]; then pip install -U oletools olefile fi # Module dependencies