From 27f58a0ede2e3ef7a1c55ffe466a2a5ab73f9d50 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Fri, 17 Mar 2017 15:14:18 -0400
Subject: [PATCH 01/21] Put log dir creation code in separate method

---
 kittengroomer/helpers.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py
index 318f1ca..5feadea 100644
--- a/kittengroomer/helpers.py
+++ b/kittengroomer/helpers.py
@@ -272,10 +272,7 @@ class GroomerLogger(object):
 
     def __init__(self, root_dir_path, debug=False):
         self.root_dir = root_dir_path
-        self.log_dir_path = os.path.join(root_dir_path, 'logs')
-        if os.path.exists(self.log_dir_path):
-            shutil.rmtree(self.log_dir_path)
-        os.makedirs(self.log_dir_path)
+        self.log_dir_path = self._make_log_dir(root_dir_path)
         self.log_processing = os.path.join(self.log_dir_path, 'processing.log')
         self.log_content = os.path.join(self.log_dir_path, 'content.log')
         twiggy.quick_setup(file=self.log_processing)
@@ -287,6 +284,13 @@ class GroomerLogger(object):
             self.log_debug_err = os.devnull
             self.log_debug_out = os.devnull
 
+    def _make_log_dir(self, root_dir_path):
+        log_dir_path = os.path.join(root_dir_path, 'logs')
+        if os.path.exists(log_dir_path):
+            shutil.rmtree(log_dir_path)
+        os.makedirs(log_dir_path)
+        return log_dir_path
+
     def tree(self, base_dir, padding='   '):
         """Write a graphical tree to the log for `base_dir`."""
         with open(self.log_content, 'ab') as lf:

From 18857c8cf77fd821dbb95e182da4f2fbcd9064f6 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Fri, 17 Mar 2017 21:00:11 -0400
Subject: [PATCH 02/21] Change names of KittenGroomerBase root dir paths

---
 kittengroomer/helpers.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py
index 5feadea..f193cd3 100644
--- a/kittengroomer/helpers.py
+++ b/kittengroomer/helpers.py
@@ -326,10 +326,10 @@ class GroomerLogger(object):
 class KittenGroomerBase(object):
     """Base object responsible for copy/sanitization process."""
 
-    def __init__(self, root_src, root_dst, debug=False):
+    def __init__(self, src_root_dir, dst_root_dir, debug=False):
         """Initialized with path to source and dest directories."""
-        self.src_root_dir = root_src
-        self.dst_root_dir = root_dst
+        self.src_root_dir = src_root_dir
+        self.dst_root_dir = dst_root_dir
         self.debug = debug
         self.cur_file = None
         self.logger = GroomerLogger(self.dst_root_dir, debug)

From 51760ebbb1056a002c18b1f9b5ad2984ef514831 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Fri, 17 Mar 2017 22:19:56 -0400
Subject: [PATCH 03/21] Move default log setup back into filecheck

* Realized that the API consumer might want to write their own logging tool.
* FileBase and KittenGroomerBase will have no logging code.
* If the API consumer likes, they can import GroomerLogger and use it in their
implementation.
---
 bin/filecheck.py            | 14 ++++++++++----
 kittengroomer/helpers.py    | 15 +++------------
 tests/logging.py            | 21 ++++++++++-----------
 tests/test_kittengroomer.py |  6 +-----
 4 files changed, 24 insertions(+), 32 deletions(-)

diff --git a/bin/filecheck.py b/bin/filecheck.py
index da7e00b..0a9febe 100644
--- a/bin/filecheck.py
+++ b/bin/filecheck.py
@@ -16,7 +16,7 @@ from PIL import Image
 # from PIL import PngImagePlugin
 from pdfid import PDFiD, cPDFiD
 
-from kittengroomer import FileBase, KittenGroomerBase
+from kittengroomer import FileBase, KittenGroomerBase, GroomerLogger
 
 
 SEVENZ_PATH = '/usr/bin/7z'
@@ -86,8 +86,9 @@ class Config:
 class File(FileBase):
 
     def __init__(self, src_path, dst_path, logger):
-        super(File, self).__init__(src_path, dst_path, logger)
+        super(File, self).__init__(src_path, dst_path)
         self.is_recursive = False
+        self.logger = logger
 
         subtypes_apps = [
             (Config.mimes_office, self._winoffice),
@@ -170,7 +171,7 @@ class File(FileBase):
 
     def _check_filename(self):
         if self.filename[0] is '.':
-            # handle dotfiles
+            # TODO: handle dotfiles?
             pass
         right_to_left_override = u"\u202E"
         if right_to_left_override in self.filename:
@@ -188,6 +189,9 @@ class File(FileBase):
         if not self.is_dangerous:
             self.mime_processing_options.get(self.main_type, self.unknown)()
 
+    def write_log(self):
+        pass
+
     # ##### Helper functions #####
     def _make_method_dict(self, list_of_tuples):
         """Returns a dictionary with mimetype: method pairs."""
@@ -500,9 +504,11 @@ class File(FileBase):
 class KittenGroomerFileCheck(KittenGroomerBase):
 
     def __init__(self, root_src, root_dst, max_recursive_depth=2, debug=False):
-        super(KittenGroomerFileCheck, self).__init__(root_src, root_dst, debug)
+        super(KittenGroomerFileCheck, self).__init__(root_src, root_dst)
         self.recursive_archive_depth = 0
         self.max_recursive_depth = max_recursive_depth
+        self.cur_file = None
+        self.logger = GroomerLogger(self.dst_root_dir, debug)
 
     def process_dir(self, src_dir, dst_dir):
         """Process a directory on the source key."""
diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py
index f193cd3..5fc5d7a 100644
--- a/kittengroomer/helpers.py
+++ b/kittengroomer/helpers.py
@@ -37,7 +37,7 @@ class FileBase(object):
     Contains file attributes and various helper methods.
     """
 
-    def __init__(self, src_path, dst_path, logger=None):
+    def __init__(self, src_path, dst_path):
         """
         Initialized with the source path and expected destination path.
 
@@ -47,7 +47,6 @@ class FileBase(object):
         self.src_path = src_path
         self.dst_path = dst_path
         self.filename = os.path.basename(self.src_path)
-        self.logger = logger
         self._file_props = {
             'filepath': self.src_path,
             'filename': self.filename,
@@ -261,11 +260,6 @@ class FileBase(object):
             self.add_error(e, '')
             return False
 
-    def write_log(self):
-        """Write logs from file to self.logger."""
-        file_log = self.logger.add_file(self)
-        file_log.fields(**self._file_props)
-
 
 class GroomerLogger(object):
     """Groomer logging interface."""
@@ -320,19 +314,16 @@ class GroomerLogger(object):
 
     def add_file(self, file):
         """Add a file to the log."""
-        return self.log.name('file.src_path')
+        pass
 
 
 class KittenGroomerBase(object):
     """Base object responsible for copy/sanitization process."""
 
-    def __init__(self, src_root_dir, dst_root_dir, debug=False):
+    def __init__(self, src_root_dir, dst_root_dir):
         """Initialized with path to source and dest directories."""
         self.src_root_dir = src_root_dir
         self.dst_root_dir = dst_root_dir
-        self.debug = debug
-        self.cur_file = None
-        self.logger = GroomerLogger(self.dst_root_dir, debug)
 
     def safe_rmtree(self, directory):
         """Remove a directory tree if it exists."""
diff --git a/tests/logging.py b/tests/logging.py
index c937a71..0b5237d 100644
--- a/tests/logging.py
+++ b/tests/logging.py
@@ -9,14 +9,13 @@ def save_logs(groomer, test_description):
         with open(groomer.logger.log_processing, 'r') as logfile:
             log = logfile.read()
             test_log.write(log)
-        if groomer.debug:
-            if os.path.exists(groomer.logger.log_debug_err):
-                test_log.write(divider.format('ERR LOG'))
-                with open(groomer.logger.log_debug_err, 'r') as debug_err:
-                    err = debug_err.read()
-                    test_log.write(err)
-            if os.path.exists(groomer.logger.log_debug_out):
-                test_log.write(divider.format('OUT LOG'))
-                with open(groomer.logger.log_debug_out, 'r') as debug_out:
-                    out = debug_out.read()
-                    test_log.write(out)
+        if os.path.exists(groomer.logger.log_debug_err):
+            test_log.write(divider.format('ERR LOG'))
+            with open(groomer.logger.log_debug_err, 'r') as debug_err:
+                err = debug_err.read()
+                test_log.write(err)
+        if os.path.exists(groomer.logger.log_debug_out):
+            test_log.write(divider.format('OUT LOG'))
+            with open(groomer.logger.log_debug_out, 'r') as debug_out:
+                out = debug_out.read()
+                test_log.write(out)
diff --git a/tests/test_kittengroomer.py b/tests/test_kittengroomer.py
index 940136d..88500b7 100644
--- a/tests/test_kittengroomer.py
+++ b/tests/test_kittengroomer.py
@@ -6,7 +6,6 @@ import os
 import pytest
 
 from kittengroomer import FileBase, KittenGroomerBase, GroomerLogger
-from kittengroomer.helpers import ImplementationRequired
 
 skip = pytest.mark.skip
 xfail = pytest.mark.xfail
@@ -245,10 +244,7 @@ class TestKittenGroomerBase:
         assert generic_groomer
 
     def test_instantiation(self, source_directory, dest_directory):
-        groomer = KittenGroomerBase(source_directory, dest_directory)
-        debug_groomer = KittenGroomerBase(source_directory,
-                                          dest_directory,
-                                          debug=True)
+        KittenGroomerBase(source_directory, dest_directory)
 
     def test_list_all_files(self, tmpdir):
         file = tmpdir.join('test.txt')

From 685146175529f59177dd1d1e83c72c548d1a5232 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Sat, 18 Mar 2017 00:10:17 -0400
Subject: [PATCH 04/21] Change from two separate logs to one

---
 bin/filecheck.py         |  7 +++----
 kittengroomer/helpers.py | 20 +++++++++++---------
 tests/logging.py         |  2 +-
 3 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/bin/filecheck.py b/bin/filecheck.py
index 0a9febe..a6fe282 100644
--- a/bin/filecheck.py
+++ b/bin/filecheck.py
@@ -190,7 +190,8 @@ class File(FileBase):
             self.mime_processing_options.get(self.main_type, self.unknown)()
 
     def write_log(self):
-        pass
+        props = self.get_all_props()
+        self.logger.add_file(props)
 
     # ##### Helper functions #####
     def _make_method_dict(self, list_of_tuples):
@@ -441,8 +442,7 @@ class File(FileBase):
             # LOG: handle metadata
             self.set_property('metadata', 'png')
             img.close()
-        # Catch decompression bombs
-        except Exception as e:
+        except Exception as e:  # Catch decompression bombs
             # TODO: only catch DecompressionBombWarnings here?
             self.add_error(e, "Caught exception processing metadata for {}".format(self.src_path))
             self.make_dangerous('exception processing metadata')
@@ -548,7 +548,6 @@ class KittenGroomerFileCheck(KittenGroomerBase):
         to an archive.
         """
         self.recursive_archive_depth += 1
-        # LOG: write_log or somehow log the archive file here
         if self.recursive_archive_depth >= self.max_recursive_depth:
             file.make_dangerous('Archive bomb')
         else:
diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py
index 5fc5d7a..85696a8 100644
--- a/kittengroomer/helpers.py
+++ b/kittengroomer/helpers.py
@@ -14,7 +14,7 @@ import shutil
 import argparse
 
 import magic
-import twiggy
+# import twiggy
 
 
 class KittenGroomerError(Exception):
@@ -41,7 +41,6 @@ class FileBase(object):
         """
         Initialized with the source path and expected destination path.
 
-        self.logger should be a logging object with an add_file method.
         Create various properties and determine the file's mimetype.
         """
         self.src_path = src_path
@@ -181,6 +180,10 @@ class FileBase(object):
         else:
             return None
 
+    def get_all_props(self):
+        """Return a dict containing all stored properties of this file."""
+        return self._file_props
+
     def add_error(self, error, info):
         """Add an error: info pair to _file_props['errors']."""
         self._file_props['errors'].update({error: info})
@@ -267,10 +270,9 @@ class GroomerLogger(object):
     def __init__(self, root_dir_path, debug=False):
         self.root_dir = root_dir_path
         self.log_dir_path = self._make_log_dir(root_dir_path)
-        self.log_processing = os.path.join(self.log_dir_path, 'processing.log')
-        self.log_content = os.path.join(self.log_dir_path, 'content.log')
-        twiggy.quick_setup(file=self.log_processing)
-        self.log = twiggy.log.name('files')
+        self.log_path = os.path.join(self.log_dir_path, 'log.txt')
+        # twiggy.quick_setup(file=self.log_processing)
+        # self.log = twiggy.log.name('files')
         if debug:
             self.log_debug_err = os.path.join(self.log_dir_path, 'debug_stderr.log')
             self.log_debug_out = os.path.join(self.log_dir_path, 'debug_stdout.log')
@@ -287,7 +289,7 @@ class GroomerLogger(object):
 
     def tree(self, base_dir, padding='   '):
         """Write a graphical tree to the log for `base_dir`."""
-        with open(self.log_content, 'ab') as lf:
+        with open(self.log_path, 'ab') as lf:
             lf.write(bytes('#' * 80 + '\n', 'UTF-8'))
             lf.write(bytes('{}+- {}/\n'.format(padding, os.path.basename(os.path.abspath(base_dir)).encode()), 'utf8'))
             padding += '|  '
@@ -312,8 +314,8 @@ class GroomerLogger(object):
                 s.update(buf)
         return s.hexdigest()
 
-    def add_file(self, file):
-        """Add a file to the log."""
+    def add_file(self, file_props):
+        """Add a file to the log. Takes a dict of file properties."""
         pass
 
 
diff --git a/tests/logging.py b/tests/logging.py
index 0b5237d..c06ad94 100644
--- a/tests/logging.py
+++ b/tests/logging.py
@@ -6,7 +6,7 @@ def save_logs(groomer, test_description):
     test_log_path = 'tests/test_logs/{}.log'.format(test_description)
     with open(test_log_path, 'w+') as test_log:
         test_log.write(divider.format('TEST LOG'))
-        with open(groomer.logger.log_processing, 'r') as logfile:
+        with open(groomer.logger.log_path, 'r') as logfile:
             log = logfile.read()
             test_log.write(log)
         if os.path.exists(groomer.logger.log_debug_err):

From 3e7b38c5d450527429e1e637bcdda2ce8c61be7c Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Mon, 20 Mar 2017 19:39:37 -0400
Subject: [PATCH 05/21] Improve doc strings on FileBase

---
 bin/filecheck.py            |  4 +-
 kittengroomer/helpers.py    | 99 +++++++++++++++++++++----------------
 tests/test_kittengroomer.py |  8 ++-
 3 files changed, 62 insertions(+), 49 deletions(-)

diff --git a/bin/filecheck.py b/bin/filecheck.py
index a6fe282..474506f 100644
--- a/bin/filecheck.py
+++ b/bin/filecheck.py
@@ -508,7 +508,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
         self.recursive_archive_depth = 0
         self.max_recursive_depth = max_recursive_depth
         self.cur_file = None
-        self.logger = GroomerLogger(self.dst_root_dir, debug)
+        self.logger = GroomerLogger(self.dst_root_path, debug)
 
     def process_dir(self, src_dir, dst_dir):
         """Process a directory on the source key."""
@@ -573,7 +573,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
         return True
 
     def run(self):
-        self.process_dir(self.src_root_dir, self.dst_root_dir)
+        self.process_dir(self.src_root_path, self.dst_root_path)
 
 
 def main(kg_implementation, description):
diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py
index 85696a8..119d008 100644
--- a/kittengroomer/helpers.py
+++ b/kittengroomer/helpers.py
@@ -159,24 +159,27 @@ class FileBase(object):
 
     def set_property(self, prop_string, value):
         """
-        Take a property and a value and add them to self._file_props.
+        Take a property and a value and add them to the file's property dict.
 
-        If prop_string is already in _file_props, set prop_string to value.
-        If prop_string not in _file_props, set prop_string to value in
-        _file_props['user_defined'].
+        If `prop_string` is part of the file property API, set it to `value`.
+        Otherwise, add `prop_string`: `value` to `user_defined` properties.
         """
         if prop_string in self._file_props.keys():
             self._file_props[prop_string] = value
         else:
             self._file_props['user_defined'][prop_string] = value
 
-    def get_property(self, file_prop):
-        """Get the value for a property in _file_props."""
+    def get_property(self, prop_string):
+        """
+        Get the value for a property stored on the file.
+
+        Returns `None` if `prop_string` cannot be found on the file.
+        """
         # TODO: could probably be refactored
-        if file_prop in self._file_props:
-            return self._file_props[file_prop]
-        elif file_prop in self._file_props['user_defined']:
-            return self._file_props['user_defined'][file_prop]
+        if prop_string in self._file_props:
+            return self._file_props[prop_string]
+        elif prop_string in self._file_props['user_defined']:
+            return self._file_props['user_defined'][prop_string]
         else:
             return None
 
@@ -184,12 +187,12 @@ class FileBase(object):
         """Return a dict containing all stored properties of this file."""
         return self._file_props
 
-    def add_error(self, error, info):
-        """Add an error: info pair to _file_props['errors']."""
-        self._file_props['errors'].update({error: info})
+    def add_error(self, error, info_string):
+        """Add an `error`: `info_string` pair to the file."""
+        self._file_props['errors'].update({error: info_string})
 
     def add_file_string(self, file_string):
-        """Add a file descriptor string to _file_props."""
+        """Add a file descriptor string to the file."""
         self._file_props['file_string_set'].add(file_string)
 
     def make_dangerous(self, reason_string=None):
@@ -237,45 +240,57 @@ class FileBase(object):
             self.add_error(e, '')
 
     def force_ext(self, ext):
-        """If dst_path does not end in ext, change it and edit _file_props."""
+        """If dst_path does not end in ext, append .ext to it."""
+        ext = self._check_leading_dot(ext)
         if not self.dst_path.endswith(ext):
-            self.set_property('force_ext', True)
+            # LOG: do we want to log that the extension was changed as below?
+            # self.set_property('force_ext', True)
             self.dst_path += ext
         if not self._file_props['extension'] == ext:
             self.set_property('extension', ext)
 
     def create_metadata_file(self, ext):
-        """Create a separate file to hold metadata from this file."""
+        """
+        Create a separate file to hold extracted metadata.
+
+        The string `ext` will be used as the extension for the metadata file.
+        """
+        ext = self._check_leading_dot(ext)
         try:
-            # make sure we aren't overwriting anything
             if os.path.exists(self.src_path + ext):
-                raise KittenGroomerError("Cannot create split metadata file for \"" +
-                                         self.dst_path + "\", type '" +
-                                         ext + "': File exists.")
+                err_str = ("Could not create metadata file for \"" +
+                           self.filename +
+                           "\": a file with that path already exists.")
+                raise KittenGroomerError(err_str)
             else:
                 dst_dir_path, filename = os.path.split(self.dst_path)
                 if not os.path.exists(dst_dir_path):
                     os.makedirs(dst_dir_path)
-                # TODO: Check extension for leading "."
                 self.metadata_file_path = self.dst_path + ext
                 return self.metadata_file_path
         except KittenGroomerError as e:
             self.add_error(e, '')
             return False
 
+    def _check_leading_dot(self, ext):
+        if len(ext) > 0:
+            if not ext.startswith('.'):
+                return '.' + ext
+        return ext
+
 
 class GroomerLogger(object):
     """Groomer logging interface."""
 
     def __init__(self, root_dir_path, debug=False):
-        self.root_dir = root_dir_path
-        self.log_dir_path = self._make_log_dir(root_dir_path)
-        self.log_path = os.path.join(self.log_dir_path, 'log.txt')
+        self._root_dir_path = root_dir_path
+        self._log_dir_path = self._make_log_dir(root_dir_path)
+        self.log_path = os.path.join(self._log_dir_path, 'log.txt')
         # twiggy.quick_setup(file=self.log_processing)
         # self.log = twiggy.log.name('files')
         if debug:
-            self.log_debug_err = os.path.join(self.log_dir_path, 'debug_stderr.log')
-            self.log_debug_out = os.path.join(self.log_dir_path, 'debug_stdout.log')
+            self.log_debug_err = os.path.join(self._log_dir_path, 'debug_stderr.log')
+            self.log_debug_out = os.path.join(self._log_dir_path, 'debug_stdout.log')
         else:
             self.log_debug_err = os.devnull
             self.log_debug_out = os.devnull
@@ -322,29 +337,29 @@ class GroomerLogger(object):
 class KittenGroomerBase(object):
     """Base object responsible for copy/sanitization process."""
 
-    def __init__(self, src_root_dir, dst_root_dir):
+    def __init__(self, src_root_path, dst_root_path):
         """Initialized with path to source and dest directories."""
-        self.src_root_dir = src_root_dir
-        self.dst_root_dir = dst_root_dir
+        self.src_root_path = src_root_path
+        self.dst_root_path = dst_root_path
 
-    def safe_rmtree(self, directory):
+    def safe_rmtree(self, directory_path):
         """Remove a directory tree if it exists."""
-        if os.path.exists(directory):
-            shutil.rmtree(directory)
+        if os.path.exists(directory_path):
+            shutil.rmtree(directory_path)
 
-    def safe_remove(self, filepath):
-        """Remove a file if it exists."""
-        if os.path.exists(filepath):
-            os.remove(filepath)
+    def safe_remove(self, file_path):
+        """Remove file at file_path if it exists."""
+        if os.path.exists(file_path):
+            os.remove(file_path)
 
-    def safe_mkdir(self, directory):
+    def safe_mkdir(self, directory_path):
         """Make a directory if it does not exist."""
-        if not os.path.exists(directory):
-            os.makedirs(directory)
+        if not os.path.exists(directory_path):
+            os.makedirs(directory_path)
 
-    def list_all_files(self, directory):
+    def list_all_files(self, directory_path):
         """Generator yielding path to all of the files in a directory tree."""
-        for root, dirs, files in os.walk(directory):
+        for root, dirs, files in os.walk(directory_path):
             for filename in files:
                 filepath = os.path.join(root, filename)
                 yield filepath
diff --git a/tests/test_kittengroomer.py b/tests/test_kittengroomer.py
index 88500b7..0ebef48 100644
--- a/tests/test_kittengroomer.py
+++ b/tests/test_kittengroomer.py
@@ -189,7 +189,6 @@ class TestFileBase:
         assert os.path.splitext(generic_conf_file.dst_path)[1] == '.conf'
         generic_conf_file.force_ext('.txt')
         assert os.path.splitext(generic_conf_file.dst_path)[1] == '.txt'
-        assert generic_conf_file.get_property('force_ext') is True
         assert generic_conf_file.get_property('extension') == '.txt'
         # should be able to handle weird paths
 
@@ -202,7 +201,6 @@ class TestFileBase:
         # shouldn't change a file's extension if it already is right
 
     def test_create_metadata_file(self, temp_file):
-        # Try making a metadata file
         metadata_file_path = temp_file.create_metadata_file('.metadata.txt')
         with open(metadata_file_path, 'w+') as metadata_file:
             metadata_file.write('Have some metadata!')
@@ -222,8 +220,8 @@ class TestLogger:
     def generic_logger(self, tmpdir):
         return GroomerLogger(tmpdir.strpath)
 
-    def test_tree(self, generic_logger):
-        generic_logger.tree(generic_logger.root_dir)
+    def test_tree(self, generic_logger, tmpdir):
+        generic_logger.tree(tmpdir.strpath)
 
 
 class TestKittenGroomerBase:
@@ -252,6 +250,6 @@ class TestKittenGroomerBase:
         testdir = tmpdir.join('testdir')
         os.mkdir(testdir.strpath)
         simple_groomer = KittenGroomerBase(tmpdir.strpath, tmpdir.strpath)
-        files = simple_groomer.list_all_files(simple_groomer.src_root_dir)
+        files = simple_groomer.list_all_files(simple_groomer.src_root_path)
         assert file.strpath in files
         assert testdir.strpath not in files

From 265f32ad6bc0f6548e9c52387fc038e75d6402fe Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Wed, 22 Mar 2017 10:28:00 -0400
Subject: [PATCH 06/21] Change the way description strings are handled

---
 kittengroomer/helpers.py | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py
index 119d008..2c9a04d 100644
--- a/kittengroomer/helpers.py
+++ b/kittengroomer/helpers.py
@@ -56,7 +56,7 @@ class FileBase(object):
             'safety_category': None,
             'symlink': False,
             'copied': False,
-            'file_string_set': set(),
+            'description_string': [],  # array of descriptions to be joined
             'errors': {},
             'user_defined': {}
         }
@@ -164,7 +164,10 @@ class FileBase(object):
         If `prop_string` is part of the file property API, set it to `value`.
         Otherwise, add `prop_string`: `value` to `user_defined` properties.
         """
-        if prop_string in self._file_props.keys():
+        if prop_string is 'description_string':
+            if prop_string not in self._file_props['description_string']:
+                self._file_props['description_string'].append(value)
+        elif prop_string in self._file_props.keys():
             self._file_props[prop_string] = value
         else:
             self._file_props['user_defined'][prop_string] = value
@@ -175,7 +178,6 @@ class FileBase(object):
 
         Returns `None` if `prop_string` cannot be found on the file.
         """
-        # TODO: could probably be refactored
         if prop_string in self._file_props:
             return self._file_props[prop_string]
         elif prop_string in self._file_props['user_defined']:
@@ -191,9 +193,13 @@ class FileBase(object):
         """Add an `error`: `info_string` pair to the file."""
         self._file_props['errors'].update({error: info_string})
 
-    def add_file_string(self, file_string):
-        """Add a file descriptor string to the file."""
-        self._file_props['file_string_set'].add(file_string)
+    def add_description(self, description_string):
+        """
+        Add a description string to the file.
+
+        If `description_string` is already present, will prevent duplicates.
+        """
+        self.set_property('description_string', description_string)
 
     def make_dangerous(self, reason_string=None):
         """
@@ -203,9 +209,10 @@ class FileBase(object):
         to help prevent double-click of death.
         """
         if self.is_dangerous:
+            self.set_property('description_string', reason_string)
             return
         self.set_property('safety_category', 'dangerous')
-        # LOG: store reason string somewhere and do something with it
+        self.set_property('description_string', reason_string)
         path, filename = os.path.split(self.dst_path)
         self.dst_path = os.path.join(path, 'DANGEROUS_{}_DANGEROUS'.format(filename))
 

From 6f9e36a5786b7108c610e5de86e277263d707990 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Wed, 22 Mar 2017 12:04:22 -0400
Subject: [PATCH 07/21] Change filecheck for new file description method

* self.add_file_string -> self.add_description
---
 bin/filecheck.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/bin/filecheck.py b/bin/filecheck.py
index 474506f..329144c 100644
--- a/bin/filecheck.py
+++ b/bin/filecheck.py
@@ -222,35 +222,35 @@ class File(FileBase):
         """Empty file or symlink."""
         if self.is_symlink:
             symlink_path = self.get_property('symlink')
-            self.add_file_string('Symlink to {}'.format(symlink_path))
+            self.add_description('Symlink to {}'.format(symlink_path))
         else:
-            self.add_file_string('Inode file')
+            self.add_description('Inode file')
         self.should_copy = False
 
     def unknown(self):
         """Main type should never be unknown."""
-        self.add_file_string('Unknown file')
+        self.add_description('Unknown file')
         self.should_copy = False
 
     def example(self):
         """Used in examples, should never be returned by libmagic."""
-        self.add_file_string('Example file')
+        self.add_description('Example file')
         self.should_copy = False
 
     def multipart(self):
         """Used in web apps, should never be returned by libmagic"""
-        self.add_file_string('Multipart file')
+        self.add_description('Multipart file')
         self.should_copy = False
 
     # ##### Treated as malicious, no reason to have it on a USB key ######
     def message(self):
         """Process a message file."""
-        self.add_file_string('Message file')
+        self.add_description('Message file')
         self.make_dangerous('Message file')
 
     def model(self):
         """Process a model file."""
-        self.add_file_string('Model file')
+        self.add_description('Model file')
         self.make_dangerous('Model file')
 
     # ##### Files that will be converted ######
@@ -258,16 +258,16 @@ class File(FileBase):
         """Process an rtf, ooxml, or plaintext file."""
         for mt in Config.mimes_rtf:
             if mt in self.sub_type:
-                self.add_file_string('Rich Text file')
+                self.add_description('Rich Text file')
                 # TODO: need a way to convert it to plain text
                 self.force_ext('.txt')
                 return
         for mt in Config.mimes_ooxml:
             if mt in self.sub_type:
-                self.add_file_string('OOXML File')
+                self.add_description('OOXML File')
                 self._ooxml()
                 return
-        self.add_file_string('Text file')
+        self.add_description('Text file')
         self.force_ext('.txt')
 
     def application(self):
@@ -277,9 +277,9 @@ class File(FileBase):
                 # TODO: should we change the logic so we don't iterate through all of the subtype methods?
                 # TODO: should these methods return a value?
                 method()
-                self.add_file_string('Application file')
+                self.add_description('Application file')
                 return
-        self.add_file_string('Unknown Application file')
+        self.add_description('Unknown Application file')
         self._unknown_app()
 
     def _executables(self):
@@ -497,7 +497,7 @@ class File(FileBase):
             # TODO: change this from all Exceptions to specific DecompressionBombWarning
             self.add_error(e, "Caught exception (possible decompression bomb?) while translating file {}.".format(self.src_path))
             self.make_dangerous()
-        self.add_file_string('Image file')
+        self.add_description('Image file')
         self.set_property('processing_type', 'image')
 
 

From 52bb566cc3ead20d2f5a16a9c614def59c0c8030 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Wed, 22 Mar 2017 12:07:15 -0400
Subject: [PATCH 08/21] Write basic log to log file

---
 kittengroomer/helpers.py | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py
index 2c9a04d..672e336 100644
--- a/kittengroomer/helpers.py
+++ b/kittengroomer/helpers.py
@@ -292,6 +292,7 @@ class GroomerLogger(object):
     def __init__(self, root_dir_path, debug=False):
         self._root_dir_path = root_dir_path
         self._log_dir_path = self._make_log_dir(root_dir_path)
+        # LOG: rename logfile to something more descriptive
         self.log_path = os.path.join(self._log_dir_path, 'log.txt')
         # twiggy.quick_setup(file=self.log_processing)
         # self.log = twiggy.log.name('files')
@@ -311,7 +312,7 @@ class GroomerLogger(object):
 
     def tree(self, base_dir, padding='   '):
         """Write a graphical tree to the log for `base_dir`."""
-        with open(self.log_path, 'ab') as lf:
+        with open(self.log_path, mode='ab') as lf:
             lf.write(bytes('#' * 80 + '\n', 'UTF-8'))
             lf.write(bytes('{}+- {}/\n'.format(padding, os.path.basename(os.path.abspath(base_dir)).encode()), 'utf8'))
             padding += '|  '
@@ -336,9 +337,21 @@ class GroomerLogger(object):
                 s.update(buf)
         return s.hexdigest()
 
+    def _write_file_to_disk(self, file_string):
+        with open(self.log_path, mode='a', encoding='utf-8') as lf:
+            lf.write(file_string)
+
     def add_file(self, file_props):
         """Add a file to the log. Takes a dict of file properties."""
-        pass
+        props = file_props
+        description_string = ', '.join(props['description_string'])
+        file_string = "{}: {}/{}, {}: {}\n".format(props['filename'],
+                                                   props['maintype'],
+                                                   props['subtype'],
+                                                   props['safety_category'],
+                                                   description_string
+                                                   )
+        self._write_file_to_disk(file_string)
 
 
 class KittenGroomerBase(object):

From ba407219d37b0290ec471c7db3c9c20c87725ea6 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Wed, 22 Mar 2017 21:49:48 -0400
Subject: [PATCH 09/21] Open log file in text mode instead of bytes mode

---
 kittengroomer/helpers.py | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py
index 672e336..86c0fd6 100644
--- a/kittengroomer/helpers.py
+++ b/kittengroomer/helpers.py
@@ -312,19 +312,23 @@ class GroomerLogger(object):
 
     def tree(self, base_dir, padding='   '):
         """Write a graphical tree to the log for `base_dir`."""
-        with open(self.log_path, mode='ab') as lf:
-            lf.write(bytes('#' * 80 + '\n', 'UTF-8'))
-            lf.write(bytes('{}+- {}/\n'.format(padding, os.path.basename(os.path.abspath(base_dir)).encode()), 'utf8'))
+        horizontal_divider = '#' * 80 + '\n'
+        with open(self.log_path, mode='a', encoding='utf-8') as lf:
+            lf.write(horizontal_divider)
+            base_dir_name = os.path.basename(os.path.abspath(base_dir))
+            lf.write('{}+- {}/\n'.format(padding, base_dir_name))
             padding += '|  '
+            # TODO: make sure this gets all sub directories and use scandir()
             files = sorted(os.listdir(base_dir))
             for f in files:
                 curpath = os.path.join(base_dir, f)
                 if os.path.islink(curpath):
-                    lf.write('{}+-- {}\t- Symbolic link to {}\n'.format(padding, f, os.readlink(curpath)).encode(errors='ignore'))
+                    lf.write('{}+-- {}\t- Symbolic link to {}\n'.format(padding, f, os.readlink(curpath)))
                 elif os.path.isdir(curpath):
                     self.tree(curpath, padding)
                 elif os.path.isfile(curpath):
-                    lf.write('{}+-- {}\t- {}\n'.format(padding, f, self._computehash(curpath)).encode(errors='ignore'))
+                    lf.write('{}+-- {}\t- {}\n'.format(padding, f, self._computehash(curpath)))
+            lf.write(horizontal_divider)
 
     def _computehash(self, path):
         """Return a sha256 hash of a file at a given path."""
@@ -345,12 +349,13 @@ class GroomerLogger(object):
         """Add a file to the log. Takes a dict of file properties."""
         props = file_props
         description_string = ', '.join(props['description_string'])
-        file_string = "{}: {}/{}, {}: {}\n".format(props['filename'],
-                                                   props['maintype'],
-                                                   props['subtype'],
-                                                   props['safety_category'],
-                                                   description_string
-                                                   )
+        file_string = "  *  {}: {}/{}, {}: {}\n".format(
+            props['filename'],
+            props['maintype'],
+            props['subtype'],
+            props['safety_category'],
+            description_string
+        )
         self._write_file_to_disk(file_string)
 
 

From c85ad27221a606290c78ab75e7f17bf4d3f7abb9 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Mon, 10 Apr 2017 12:54:07 +0200
Subject: [PATCH 10/21] New test files

---
 tests/src_valid/dir1/dir2/blah.conf |   1 +
 tests/src_valid/test.zip            | Bin 0 -> 184 bytes
 2 files changed, 1 insertion(+)
 create mode 100644 tests/src_valid/dir1/dir2/blah.conf
 create mode 100644 tests/src_valid/test.zip

diff --git a/tests/src_valid/dir1/dir2/blah.conf b/tests/src_valid/dir1/dir2/blah.conf
new file mode 100644
index 0000000..484ba93
--- /dev/null
+++ b/tests/src_valid/dir1/dir2/blah.conf
@@ -0,0 +1 @@
+This is a test.
diff --git a/tests/src_valid/test.zip b/tests/src_valid/test.zip
new file mode 100644
index 0000000000000000000000000000000000000000..4b911c0a1d439e850190b25a5ad6b3381cdc7394
GIT binary patch
literal 184
zcmWIWW@h1H0D;CP6EEQxr_}|3Y!K#TkYPy5NzBko&d*B=4dG;9W~^_D5S?%{qO^jW
zfsy4KBLf4A2vA{2MrN@B5G5*<q!yRxaRqoYGRZOHGE@SngMk64mtjdGh=pVYE5r&k
U;{&`|*+2>zfzTgF2ZJ~a0Bu4cQvd(}

literal 0
HcmV?d00001


From f0e7607a3fa700627adf5882801159462a5eb449 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Mon, 10 Apr 2017 13:00:34 +0200
Subject: [PATCH 11/21] Improve description strings in filecheck

* Description strings that appear in the log improved in filecheck for various
file types
* Added various comments
---
 bin/filecheck.py | 114 ++++++++++++++++++++++-------------------------
 1 file changed, 54 insertions(+), 60 deletions(-)

diff --git a/bin/filecheck.py b/bin/filecheck.py
index 329144c..e58d478 100644
--- a/bin/filecheck.py
+++ b/bin/filecheck.py
@@ -13,6 +13,7 @@ import officedissector
 import warnings
 import exifread
 from PIL import Image
+# TODO: why do we have this import? How does filecheck handle pngs?
 # from PIL import PngImagePlugin
 from pdfid import PDFiD, cPDFiD
 
@@ -124,11 +125,11 @@ class File(FileBase):
 
     def _check_dangerous(self):
         if not self.has_mimetype:
-            self.make_dangerous('no mimetype')
+            self.make_dangerous('File has no mimetype')
         if not self.has_extension:
-            self.make_dangerous('no extension')
+            self.make_dangerous('File has no extension')
         if self.extension in Config.malicious_exts:
-            self.make_dangerous('malicious_extension')
+            self.make_dangerous('Extension identifies file as potentially dangerous')
 
     def _check_extension(self):
         """
@@ -148,8 +149,7 @@ class File(FileBase):
                 expected_mimetype = Config.aliases[expected_mimetype]
         is_known_extension = self.extension in mimetypes.types_map.keys()
         if is_known_extension and expected_mimetype != self.mimetype:
-            # LOG: improve this string
-            self.make_dangerous('expected_mimetype')
+            self.make_dangerous('Mimetype does not match expected mimetype for this extension')
 
     def _check_mimetype(self):
         """
@@ -166,18 +166,17 @@ class File(FileBase):
                                                              strict=False)
         if expected_extensions:
             if self.has_extension and self.extension not in expected_extensions:
-                # LOG: improve this string
-                self.make_dangerous('expected extensions')
+                self.make_dangerous('Extension does not match expected extensions for this mimetype')
 
     def _check_filename(self):
         if self.filename[0] is '.':
-            # TODO: handle dotfiles?
+            # TODO: handle dotfiles here
             pass
         right_to_left_override = u"\u202E"
         if right_to_left_override in self.filename:
             self.make_dangerous('Filename contains dangerous character')
             self.dst_path = self.dst_path.replace(right_to_left_override, '')
-            # TODO: change self.filename and'filename' property?
+            # TODO: change self.filename and'filename' property? Or should those reflect the values on the source key
 
     def check(self):
         self._check_dangerous()
@@ -222,14 +221,14 @@ class File(FileBase):
         """Empty file or symlink."""
         if self.is_symlink:
             symlink_path = self.get_property('symlink')
-            self.add_description('Symlink to {}'.format(symlink_path))
+            self.add_description('File is a symlink to {}'.format(symlink_path))
         else:
-            self.add_description('Inode file')
+            self.add_description('File is an inode (empty file)')
         self.should_copy = False
 
     def unknown(self):
         """Main type should never be unknown."""
-        self.add_description('Unknown file')
+        self.add_description('Unknown mimetype')
         self.should_copy = False
 
     def example(self):
@@ -239,35 +238,33 @@ class File(FileBase):
 
     def multipart(self):
         """Used in web apps, should never be returned by libmagic"""
-        self.add_description('Multipart file')
+        self.add_description('Multipart file - usually found in web apps')
         self.should_copy = False
 
     # ##### Treated as malicious, no reason to have it on a USB key ######
     def message(self):
         """Process a message file."""
-        self.add_description('Message file')
-        self.make_dangerous('Message file')
+        self.make_dangerous('Message file - should not be found on USB key')
 
     def model(self):
         """Process a model file."""
-        self.add_description('Model file')
-        self.make_dangerous('Model file')
+        self.make_dangerous('Model file - should not be found on USB key')
 
     # ##### Files that will be converted ######
     def text(self):
         """Process an rtf, ooxml, or plaintext file."""
         for mt in Config.mimes_rtf:
             if mt in self.sub_type:
-                self.add_description('Rich Text file')
+                self.add_description('Rich Text (rtf) file')
                 # TODO: need a way to convert it to plain text
                 self.force_ext('.txt')
                 return
         for mt in Config.mimes_ooxml:
             if mt in self.sub_type:
-                self.add_description('OOXML File')
+                self.add_description('OOXML (openoffice) file')
                 self._ooxml()
                 return
-        self.add_description('Text file')
+        self.add_description('Plain text file')
         self.force_ext('.txt')
 
     def application(self):
@@ -277,103 +274,98 @@ class File(FileBase):
                 # TODO: should we change the logic so we don't iterate through all of the subtype methods?
                 # TODO: should these methods return a value?
                 method()
-                self.add_description('Application file')
                 return
-        self.add_description('Unknown Application file')
         self._unknown_app()
 
     def _executables(self):
         """Process an executable file."""
         # LOG: change the processing_type property to some other name or include in file_string
-        self.set_property('processing_type', 'executable')
-        self.make_dangerous('executable')
+        self.make_dangerous('Executable file')
 
     def _winoffice(self):
         """Process a winoffice file using olefile/oletools."""
-        # LOG: processing_type property
-        self.set_property('processing_type', 'WinOffice')
         oid = oletools.oleid.OleID(self.src_path)  # First assume a valid file
         if not olefile.isOleFile(self.src_path):
             # Manual processing, may already count as suspicious
             try:
                 ole = olefile.OleFileIO(self.src_path, raise_defects=olefile.DEFECT_INCORRECT)
             except:
-                self.make_dangerous('not parsable')
+                self.make_dangerous('Unparsable WinOffice file')
             if ole.parsing_issues:
-                self.make_dangerous('parsing issues')
+                self.make_dangerous('Parsing issues with WinOffice file')
             else:
                 if ole.exists('macros/vba') or ole.exists('Macros') \
                         or ole.exists('_VBA_PROJECT_CUR') or ole.exists('VBA'):
-                    self.make_dangerous('macro')
+                    self.make_dangerous('WinOffice file containing a macro')
         else:
             indicators = oid.check()
             # Encrypted can be set by multiple checks on the script
             if oid.encrypted.value:
-                self.make_dangerous('encrypted')
+                self.make_dangerous('Encrypted WinOffice file')
             if oid.macros.value or oid.ole.exists('macros/vba') or oid.ole.exists('Macros') \
                     or oid.ole.exists('_VBA_PROJECT_CUR') or oid.ole.exists('VBA'):
-                self.make_dangerous('macro')
+                self.make_dangerous('WinOffice file containing a macro')
             for i in indicators:
                 if i.id == 'ObjectPool' and i.value:
-                    # TODO: Is it suspicious?
+                    # TODO: is having an ObjectPool suspicious?
                     # LOG: user defined property
-                    self.set_property('objpool', True)
+                    self.add_description('WinOffice file containing an object pool')
                 elif i.id == 'flash' and i.value:
-                    self.make_dangerous('flash')
+                    self.make_dangerous('WinOffice file with embedded flash')
+        self.add_description('WinOffice file')
 
     def _ooxml(self):
         """Process an ooxml file."""
-        # LOG: processing_type property
-        self.set_property('processing_type', 'ooxml')
         try:
             doc = officedissector.doc.Document(self.src_path)
         except Exception:
-            self.make_dangerous('invalid ooxml file')
+            self.make_dangerous('Invalid ooxml file')
             return
         # There are probably other potentially malicious features:
         # fonts, custom props, custom XML
         if doc.is_macro_enabled or len(doc.features.macros) > 0:
-            self.make_dangerous('macro')
+            self.make_dangerous('Ooxml file containing macro')
         if len(doc.features.embedded_controls) > 0:
-            self.make_dangerous('activex')
+            self.make_dangerous('Ooxml file with activex')
         if len(doc.features.embedded_objects) > 0:
             # Exploited by CVE-2014-4114 (OLE)
-            self.make_dangerous('embedded obj')
+            self.make_dangerous('Ooxml file with embedded objects')
         if len(doc.features.embedded_packages) > 0:
-            self.make_dangerous('embedded pack')
+            self.make_dangerous('Ooxml file with embedded packages')
 
     def _libreoffice(self):
         """Process a libreoffice file."""
-        self.set_property('processing_type', 'libreoffice')
         # As long as there is no way to do a sanity check on the files => dangerous
         try:
             lodoc = zipfile.ZipFile(self.src_path, 'r')
         except:
-            # TODO: are there specific exceptions we should catch here? Or is anything ok
-            self.make_dangerous('invalid libreoffice file')
+            # TODO: are there specific exceptions we should catch here? Or should it be everything
+            self.make_dangerous('Invalid libreoffice file')
         for f in lodoc.infolist():
             fname = f.filename.lower()
             if fname.startswith('script') or fname.startswith('basic') or \
                     fname.startswith('object') or fname.endswith('.bin'):
-                self.make_dangerous('macro')
+                self.make_dangerous('Libreoffice file containing executable code')
+        if not self.is_dangerous:
+            self.add_description('Libreoffice file')
 
     def _pdf(self):
         """Process a PDF file."""
-        # LOG: processing_type property
-        self.set_property('processing_type', 'pdf')
         xmlDoc = PDFiD(self.src_path)
         oPDFiD = cPDFiD(xmlDoc, True)
-        # TODO: are there other characteristics which should be dangerous?
+        # TODO: are there other pdf characteristics which should be dangerous?
         if oPDFiD.encrypt.count > 0:
-            self.make_dangerous('encrypted pdf')
+            self.make_dangerous('Encrypted pdf')
         if oPDFiD.js.count > 0 or oPDFiD.javascript.count > 0:
-            self.make_dangerous('pdf with javascript')
+            self.make_dangerous('Pdf with embedded javascript')
         if oPDFiD.aa.count > 0 or oPDFiD.openaction.count > 0:
-            self.make_dangerous('openaction')
+            self.make_dangerous('Pdf with openaction(s)')
         if oPDFiD.richmedia.count > 0:
-            self.make_dangerous('flash')
+            self.make_dangerous('Pdf containing flash')
         if oPDFiD.launch.count > 0:
-            self.make_dangerous('launch')
+            self.make_dangerous('Pdf with launch action(s)')
+        if not self.is_dangerous:
+            self.add_description('Pdf file')
 
     def _archive(self):
         """
@@ -383,24 +375,26 @@ class File(FileBase):
         is called on that directory. The recursive archive depth is increased
         to protect against archive bombs.
         """
-        # LOG: change this to something archive specific
-        self.set_property('processing_type', 'archive')
+        # TODO: change this to something archive type specific instead of generic 'Archive'
+        self.add_description('Archive')
         self.should_copy = False
         self.is_recursive = True
 
     def _unknown_app(self):
         """Process an unknown file."""
+        self.add_description('Unknown application file')
         self.make_unknown()
 
     def _binary_app(self):
         """Process an unknown binary file."""
+        self.add_description('Unknown binary file')
         self.make_binary()
 
     #######################
     # Metadata extractors
     def _metadata_exif(self, metadata_file_path):
         """Read exif metadata from a jpg or tiff file using exifread."""
-        # TODO: this method is kind of long, can we shorten it somehow?
+        # TODO: can we shorten this method somehow?
         img = open(self.src_path, 'rb')
         tags = None
         try:
@@ -424,7 +418,7 @@ class File(FileBase):
                     tag_string = str(tag_value)
                 with open(metadata_file_path, 'w+') as metadata_file:
                     metadata_file.write("Key: {}\tValue: {}\n".format(tag, tag_string))
-        # LOG: how do we want to log metadata?
+        # TODO: how do we want to log metadata?
         self.set_property('metadata', 'exif')
         img.close()
         return True
@@ -461,17 +455,17 @@ class File(FileBase):
     # ##### Media - audio and video aren't converted ######
     def audio(self):
         """Process an audio file."""
-        self.log_string += 'Audio file'
+        self.add_description('Audio file')
         self._media_processing()
 
     def video(self):
         """Process a video."""
-        self.log_string += 'Video file'
+        self.add_description('Video file')
         self._media_processing()
 
     def _media_processing(self):
         """Generic way to process all media files."""
-        self.set_property('processing_type', 'media')
+        self.add_description('Media file')
 
     def image(self):
         """

From 3f49612a23be69d1da05d274d4c8a4860978e5a2 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Mon, 10 Apr 2017 13:07:21 +0200
Subject: [PATCH 12/21] Add new logger, move logging to filecheck

* Wrote a new text-based logger that displays all file information in the tree
instead of using two separate logs
* Stopped using twiggy since it wasn't giving us anything useful
* Moved a lot of the logging code to filecheck, since it didn't really seem
appropriate as an API. Left a Logging stub in kittengroomer to hold methods
that might be useful for implementing other loggers.
* For the new logger, had to change the way that we traverse the items in the
source file tree.
---
 bin/filecheck.py            | 144 ++++++++++++++++++++++++++++++------
 kittengroomer/__init__.py   |   2 +-
 kittengroomer/helpers.py    |  67 +----------------
 tests/test_kittengroomer.py |   9 +--
 4 files changed, 129 insertions(+), 93 deletions(-)

diff --git a/bin/filecheck.py b/bin/filecheck.py
index e58d478..8200eeb 100644
--- a/bin/filecheck.py
+++ b/bin/filecheck.py
@@ -6,6 +6,7 @@ import shlex
 import subprocess
 import zipfile
 import argparse
+import shutil
 
 import oletools.oleid
 import olefile
@@ -17,7 +18,7 @@ from PIL import Image
 # from PIL import PngImagePlugin
 from pdfid import PDFiD, cPDFiD
 
-from kittengroomer import FileBase, KittenGroomerBase, GroomerLogger
+from kittengroomer import FileBase, KittenGroomerBase, Logging
 
 
 SEVENZ_PATH = '/usr/bin/7z'
@@ -90,6 +91,7 @@ class File(FileBase):
         super(File, self).__init__(src_path, dst_path)
         self.is_recursive = False
         self.logger = logger
+        self.tempdir_path = self.dst_path + '_temp'
 
         subtypes_apps = [
             (Config.mimes_office, self._winoffice),
@@ -190,7 +192,12 @@ class File(FileBase):
 
     def write_log(self):
         props = self.get_all_props()
-        self.logger.add_file(props)
+        if not self.is_recursive:
+            if os.path.exists(self.tempdir_path):
+                # Hack to make images appear at the correct tree depth in log
+                self.logger.add_file(self.src_path, props, in_tempdir=True)
+                return
+        self.logger.add_file(self.src_path, props)
 
     # ##### Helper functions #####
     def _make_method_dict(self, list_of_tuples):
@@ -210,7 +217,6 @@ class File(FileBase):
 
     def make_tempdir(self):
         """Make a temporary directory at self.tempdir_path."""
-        self.tempdir_path = self.dst_path + '_temp'
         if not os.path.exists(self.tempdir_path):
             os.makedirs(self.tempdir_path)
         return self.tempdir_path
@@ -490,9 +496,93 @@ class File(FileBase):
         except Exception as e:  # Catch decompression bombs
             # TODO: change this from all Exceptions to specific DecompressionBombWarning
             self.add_error(e, "Caught exception (possible decompression bomb?) while translating file {}.".format(self.src_path))
-            self.make_dangerous()
-        self.add_description('Image file')
-        self.set_property('processing_type', 'image')
+            self.make_dangerous('Image file containing decompression bomb')
+        if not self.is_dangerous:
+            self.add_description('Image file')
+
+
+class GroomerLogger(object):
+    """Groomer logging interface."""
+
+    def __init__(self, src_root_path, dst_root_path, debug=False):
+        self._src_root_path = src_root_path
+        self._dst_root_path = dst_root_path
+        self._log_dir_path = self._make_log_dir(dst_root_path)
+        self.log_path = os.path.join(self._log_dir_path, 'circlean_log.txt')
+        self._add_root_dir(src_root_path)
+        if debug:
+            self.log_debug_err = os.path.join(self._log_dir_path, 'debug_stderr.log')
+            self.log_debug_out = os.path.join(self._log_dir_path, 'debug_stdout.log')
+        else:
+            self.log_debug_err = os.devnull
+            self.log_debug_out = os.devnull
+
+    def _make_log_dir(self, root_dir_path):
+        """Make the directory in the dest dir that will hold the logs"""
+        log_dir_path = os.path.join(root_dir_path, 'logs')
+        if os.path.exists(log_dir_path):
+            shutil.rmtree(log_dir_path)
+        os.makedirs(log_dir_path)
+        return log_dir_path
+
+    def _add_root_dir(self, root_path):
+        dirname = os.path.split(root_path)[1] + '/'
+        with open(self.log_path, mode='ab') as lf:
+            lf.write(bytes(dirname, 'utf-8'))
+            lf.write(b'\n')
+
+    def add_file(self, file_path, file_props, in_tempdir=False):
+        """Add a file to the log. Takes a dict of file properties."""
+        # TODO: fix var names in this method
+        # TODO: handle symlinks better: symlink_string = '{}+-- {}\t- Symbolic link to {}\n'.format(padding, f, os.readlink(curpath))
+        props = file_props
+        depth = self._get_path_depth(file_path)
+        description_string = ', '.join(props['description_string'])
+        file_hash = Logging.computehash(file_path)[:6]
+        if props['safety_category'] is None:
+            descr_cat = "Normal"
+        else:
+            descr_cat = props['safety_category'].capitalize()
+        # TODO: make size adjust to MB/GB for large files
+        size = str(props['file_size']) + 'B'
+        file_template = "+- {name} ({sha_hash}): {size}, {mt}/{st}. {desc}: {desc_str}"
+        file_string = file_template.format(
+            name=props['filename'],
+            sha_hash=file_hash,
+            size=size,
+            mt=props['maintype'],
+            st=props['subtype'],
+            desc=descr_cat,
+            desc_str=description_string,
+            # errs=''  # TODO: add errors in human readable form here
+        )
+        if in_tempdir:
+            depth -= 1
+        self._write_line_to_log(file_string, depth)
+
+    def add_dir(self, dir_path):
+        path_depth = self._get_path_depth(dir_path)
+        dirname = os.path.split(dir_path)[1] + '/'
+        log_line = '+- ' + dirname
+        self._write_line_to_log(log_line, path_depth)
+
+    def _get_path_depth(self, path):
+        if self._dst_root_path in path:
+            base_path = self._dst_root_path
+        elif self._src_root_path in path:
+            base_path = self._src_root_path
+        relpath = os.path.relpath(path, base_path)
+        path_depth = relpath.count(os.path.sep)
+        return path_depth
+
+    def _write_line_to_log(self, line, indentation_depth):
+        # TODO: should we use fsencode and fsdecode here instead of just bytestrings?
+        padding = b'   '
+        padding += b'|  ' * indentation_depth
+        with open(self.log_path, mode='ab') as lf:
+            lf.write(padding)
+            lf.write(bytes(line, encoding='utf-8'))
+            lf.write(b'\n')
 
 
 class KittenGroomerFileCheck(KittenGroomerBase):
@@ -502,20 +592,17 @@ class KittenGroomerFileCheck(KittenGroomerBase):
         self.recursive_archive_depth = 0
         self.max_recursive_depth = max_recursive_depth
         self.cur_file = None
-        self.logger = GroomerLogger(self.dst_root_path, debug)
+        self.logger = GroomerLogger(root_src, root_dst, debug)
 
     def process_dir(self, src_dir, dst_dir):
         """Process a directory on the source key."""
-        self.logger.tree(src_dir)
-        for srcpath in self.list_all_files(src_dir):
-            dstpath = srcpath.replace(src_dir, dst_dir)
-            # TODO: Can we clean up the way we handle relative_path?
-            # Relative path is here so that when we print files in the log it
-            # shows only the file's path. Should we just pass it to the logger
-            # when we create it? Or let the logger figure it out?
-            # relative_path = srcpath.replace(src_dir + '/', '')
-            self.cur_file = File(srcpath, dstpath, self.logger)
-            self.process_file(self.cur_file)
+        for srcpath in self.list_files_dirs(src_dir):
+            if os.path.isdir(srcpath):
+                self.logger.add_dir(srcpath)
+            else:
+                dstpath = os.path.join(dst_dir, os.path.basename(srcpath))
+                self.cur_file = File(srcpath, dstpath, self.logger)
+                self.process_file(self.cur_file)
 
     def process_file(self, file):
         """
@@ -525,12 +612,13 @@ class KittenGroomerFileCheck(KittenGroomerBase):
         the file to the destionation key, and clean up temporary directory.
         """
         file.check()
-        if file.is_recursive:
-            self.process_archive(file)
-        elif file.should_copy:
+        if file.should_copy:
             file.safe_copy()
             file.set_property('copied', True)
-        file.write_log()
+            file.write_log()
+        if file.is_recursive:
+            self.process_archive(file)
+        # TODO: Can probably handle cleaning up the tempdir better
         if hasattr(file, 'tempdir_path'):
             self.safe_rmtree(file.tempdir_path)
 
@@ -547,11 +635,12 @@ class KittenGroomerFileCheck(KittenGroomerBase):
         else:
             tempdir_path = file.make_tempdir()
             # TODO: double check we are properly escaping file.src_path
-            # otherwise we are running unvalidated user input directly in the shell
+            # otherwise we are running unsanitized user input directly in the shell
             command_str = '{} -p1 x "{}" -o"{}" -bd -aoa'
             unpack_command = command_str.format(SEVENZ_PATH,
                                                 file.src_path, tempdir_path)
             self._run_process(unpack_command)
+            file.write_log()
             self.process_dir(tempdir_path, file.dst_path)
             self.safe_rmtree(tempdir_path)
         self.recursive_archive_depth -= 1
@@ -566,6 +655,17 @@ class KittenGroomerFileCheck(KittenGroomerBase):
                 return
         return True
 
+    def list_files_dirs(self, root_dir_path):
+        queue = []
+        for path in sorted(os.listdir(root_dir_path), key=lambda x: str.lower(x)):
+            full_path = os.path.join(root_dir_path, path)
+            if os.path.isdir(full_path):
+                queue.append(full_path)
+                queue += self.list_files_dirs(full_path)  # if path is a dir, recurse through its contents
+            elif os.path.isfile(full_path):
+                queue.append(full_path)
+        return queue
+
     def run(self):
         self.process_dir(self.src_root_path, self.dst_root_path)
 
diff --git a/kittengroomer/__init__.py b/kittengroomer/__init__.py
index 8428553..25a7965 100644
--- a/kittengroomer/__init__.py
+++ b/kittengroomer/__init__.py
@@ -1,4 +1,4 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-from .helpers import FileBase, KittenGroomerBase, GroomerLogger, main
+from .helpers import FileBase, KittenGroomerBase, Logging, main
diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py
index 86c0fd6..6411404 100644
--- a/kittengroomer/helpers.py
+++ b/kittengroomer/helpers.py
@@ -14,7 +14,6 @@ import shutil
 import argparse
 
 import magic
-# import twiggy
 
 
 class KittenGroomerError(Exception):
@@ -286,52 +285,11 @@ class FileBase(object):
         return ext
 
 
-class GroomerLogger(object):
-    """Groomer logging interface."""
+class Logging(object):
 
-    def __init__(self, root_dir_path, debug=False):
-        self._root_dir_path = root_dir_path
-        self._log_dir_path = self._make_log_dir(root_dir_path)
-        # LOG: rename logfile to something more descriptive
-        self.log_path = os.path.join(self._log_dir_path, 'log.txt')
-        # twiggy.quick_setup(file=self.log_processing)
-        # self.log = twiggy.log.name('files')
-        if debug:
-            self.log_debug_err = os.path.join(self._log_dir_path, 'debug_stderr.log')
-            self.log_debug_out = os.path.join(self._log_dir_path, 'debug_stdout.log')
-        else:
-            self.log_debug_err = os.devnull
-            self.log_debug_out = os.devnull
-
-    def _make_log_dir(self, root_dir_path):
-        log_dir_path = os.path.join(root_dir_path, 'logs')
-        if os.path.exists(log_dir_path):
-            shutil.rmtree(log_dir_path)
-        os.makedirs(log_dir_path)
-        return log_dir_path
-
-    def tree(self, base_dir, padding='   '):
-        """Write a graphical tree to the log for `base_dir`."""
-        horizontal_divider = '#' * 80 + '\n'
-        with open(self.log_path, mode='a', encoding='utf-8') as lf:
-            lf.write(horizontal_divider)
-            base_dir_name = os.path.basename(os.path.abspath(base_dir))
-            lf.write('{}+- {}/\n'.format(padding, base_dir_name))
-            padding += '|  '
-            # TODO: make sure this gets all sub directories and use scandir()
-            files = sorted(os.listdir(base_dir))
-            for f in files:
-                curpath = os.path.join(base_dir, f)
-                if os.path.islink(curpath):
-                    lf.write('{}+-- {}\t- Symbolic link to {}\n'.format(padding, f, os.readlink(curpath)))
-                elif os.path.isdir(curpath):
-                    self.tree(curpath, padding)
-                elif os.path.isfile(curpath):
-                    lf.write('{}+-- {}\t- {}\n'.format(padding, f, self._computehash(curpath)))
-            lf.write(horizontal_divider)
-
-    def _computehash(self, path):
-        """Return a sha256 hash of a file at a given path."""
+    @staticmethod
+    def computehash(path):
+        """Return the sha256 hash of a file at a given path."""
         s = hashlib.sha256()
         with open(path, 'rb') as f:
             while True:
@@ -341,23 +299,6 @@ class GroomerLogger(object):
                 s.update(buf)
         return s.hexdigest()
 
-    def _write_file_to_disk(self, file_string):
-        with open(self.log_path, mode='a', encoding='utf-8') as lf:
-            lf.write(file_string)
-
-    def add_file(self, file_props):
-        """Add a file to the log. Takes a dict of file properties."""
-        props = file_props
-        description_string = ', '.join(props['description_string'])
-        file_string = "  *  {}: {}/{}, {}: {}\n".format(
-            props['filename'],
-            props['maintype'],
-            props['subtype'],
-            props['safety_category'],
-            description_string
-        )
-        self._write_file_to_disk(file_string)
-
 
 class KittenGroomerBase(object):
     """Base object responsible for copy/sanitization process."""
diff --git a/tests/test_kittengroomer.py b/tests/test_kittengroomer.py
index 0ebef48..538314d 100644
--- a/tests/test_kittengroomer.py
+++ b/tests/test_kittengroomer.py
@@ -5,7 +5,7 @@ import os
 
 import pytest
 
-from kittengroomer import FileBase, KittenGroomerBase, GroomerLogger
+from kittengroomer import FileBase, KittenGroomerBase
 
 skip = pytest.mark.skip
 xfail = pytest.mark.xfail
@@ -216,12 +216,7 @@ class TestFileBase:
 
 class TestLogger:
 
-    @fixture
-    def generic_logger(self, tmpdir):
-        return GroomerLogger(tmpdir.strpath)
-
-    def test_tree(self, generic_logger, tmpdir):
-        generic_logger.tree(tmpdir.strpath)
+    pass
 
 
 class TestKittenGroomerBase:

From 67c90087bad42d54b44044c3d1afae02538c456f Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Mon, 10 Apr 2017 13:13:22 +0200
Subject: [PATCH 13/21] Add time information to test logs

---
 tests/logging.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/logging.py b/tests/logging.py
index c06ad94..a6d4443 100644
--- a/tests/logging.py
+++ b/tests/logging.py
@@ -1,4 +1,5 @@
 import os
+from datetime import datetime
 
 
 def save_logs(groomer, test_description):
@@ -6,6 +7,9 @@ def save_logs(groomer, test_description):
     test_log_path = 'tests/test_logs/{}.log'.format(test_description)
     with open(test_log_path, 'w+') as test_log:
         test_log.write(divider.format('TEST LOG'))
+        test_log.write(str(datetime.now().time()) + '\n')
+        test_log.write(test_description + '\n')
+        test_log.write('-' * 20 + '\n')
         with open(groomer.logger.log_path, 'r') as logfile:
             log = logfile.read()
             test_log.write(log)

From c43ac0697abbc6c045e4a67f18b30dc423b2ef9f Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Mon, 10 Apr 2017 13:18:27 +0200
Subject: [PATCH 14/21] Add comments/notes to helpers.py

---
 kittengroomer/helpers.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py
index 6411404..c7ff3a5 100644
--- a/kittengroomer/helpers.py
+++ b/kittengroomer/helpers.py
@@ -3,8 +3,8 @@
 
 """
 Contains the base objects for use when creating a sanitizer using
-PyCIRCLean. Subclass FileBase and KittenGroomerBase to implement your
-desired behavior.
+PyCIRCLean. Subclass or import from FileBase/KittenGroomerBase and implement
+your desired behavior.
 """
 
 
@@ -87,9 +87,9 @@ class FileBase(object):
         else:
             try:
                 mt = magic.from_file(self.src_path, mime=True)
-                # Note: magic will always return something, even if it's just 'data'
+                # Note: libmagic will always return something, even if it's just 'data'
             except UnicodeEncodeError as e:
-                # FIXME: The encoding of the file is broken (possibly UTF-16)
+                # FIXME: The encoding of the file that triggers this is broken (possibly it's UTF-16 and Python expects utf8)
                 # Note: one of the Travis files will trigger this exception
                 self.add_error(e, '')
                 mt = None
@@ -118,8 +118,6 @@ class FileBase(object):
     @property
     def has_mimetype(self):
         """True if file has a main and sub mimetype, else False."""
-        # TODO: broken mimetype checks should be done somewhere else.
-        # Should the check be by default or should we let the API consumer write it?
         if not self.main_type or not self.sub_type:
             return False
         else:
@@ -326,19 +324,20 @@ class KittenGroomerBase(object):
     def list_all_files(self, directory_path):
         """Generator yielding path to all of the files in a directory tree."""
         for root, dirs, files in os.walk(directory_path):
+            # files is a list anyway so we don't get much from using a generator here
             for filename in files:
                 filepath = os.path.join(root, filename)
                 yield filepath
 
     #######################
 
-    # TODO: feels like this function doesn't need to exist if we move main()
+    # TODO: if we move main() we can get rid of this as well
     def processdir(self, src_dir, dst_dir):
         """Implement this function to define file processing behavior."""
         raise ImplementationRequired('Please implement processdir.')
 
 
-# TODO: Maybe this shouldn't exist? It should probably get moved to filecheck since this isn't really API code
+# TODO: Should this get moved to filecheck? It isn't really API code and somebody can implement it themselves
 def main(kg_implementation, description='Call a KittenGroomer implementation to process files present in the source directory and copy them to the destination directory.'):
     parser = argparse.ArgumentParser(prog='KittenGroomer', description=description)
     parser.add_argument('-s', '--source', type=str, help='Source directory')

From 3e56787686a3a0275966a8ccae9c8ca13ca525ee Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Mon, 10 Apr 2017 13:18:51 +0200
Subject: [PATCH 15/21] Update tests for new logger

---
 tests/test_filecheck.py | 54 ++++++++++++++++++-----------------------
 1 file changed, 23 insertions(+), 31 deletions(-)

diff --git a/tests/test_filecheck.py b/tests/test_filecheck.py
index f58152d..e7aeaac 100644
--- a/tests/test_filecheck.py
+++ b/tests/test_filecheck.py
@@ -13,44 +13,26 @@ try:
 except ImportError:
     NODEPS = True
 
+fixture = pytest.fixture
+skip = pytest.mark.skip
 skipif_nodeps = pytest.mark.skipif(NODEPS,
                                    reason="Dependencies aren't installed")
 
 
 @skipif_nodeps
-class TestIntegration:
+class TestSystem:
 
-    @pytest.fixture
-    def src_valid_path(self):
-        return os.path.join(os.getcwd(), 'tests/src_valid')
+    @fixture
+    def valid_groomer(self):
+        src_path = os.path.join(os.getcwd(), 'tests/src_valid')
+        dst_path = self.make_dst_dir_path(src_path)
+        return KittenGroomerFileCheck(src_path, dst_path, debug=True)
 
-    @pytest.fixture
-    def src_invalid_path(self):
-        return os.path.join(os.getcwd(), 'tests/src_invalid')
-
-    @pytest.fixture
-    def dst(self):
-        return os.path.join(os.getcwd(), 'tests/dst')
-
-    def test_filecheck_src_invalid(self, src_invalid_path):
-        dst_path = self.make_dst_dir_path(src_invalid_path)
-        groomer = KittenGroomerFileCheck(src_invalid_path, dst_path, debug=True)
-        groomer.run()
-        test_description = "filecheck_invalid"
-        save_logs(groomer, test_description)
-
-    def test_filecheck_2(self, src_valid_path):
-        dst_path = self.make_dst_dir_path(src_valid_path)
-        groomer = KittenGroomerFileCheck(src_valid_path, dst_path, debug=True)
-        groomer.run()
-        test_description = "filecheck_valid"
-        save_logs(groomer, test_description)
-
-    def test_processdir(self):
-        pass
-
-    def test_handle_archives(self):
-        pass
+    @fixture
+    def invalid_groomer(self):
+        src_path = os.path.join(os.getcwd(), 'tests/src_invalid')
+        dst_path = self.make_dst_dir_path(src_path)
+        return KittenGroomerFileCheck(src_path, dst_path, debug=True)
 
     def make_dst_dir_path(self, src_dir_path):
         dst_path = src_dir_path + '_dst'
@@ -58,6 +40,16 @@ class TestIntegration:
         os.makedirs(dst_path, exist_ok=True)
         return dst_path
 
+    def test_filecheck_src_valid(self, valid_groomer):
+        valid_groomer.run()
+        test_description = "filecheck_valid"
+        save_logs(valid_groomer, test_description)
+
+    def test_filecheck_src_invalid(self, invalid_groomer):
+        invalid_groomer.run()
+        test_description = "filecheck_invalid"
+        save_logs(invalid_groomer, test_description)
+
 
 class TestFileHandling:
     def test_autorun(self):

From 5865ddd94d0374196c4ee904cc2a27f0d27f18a7 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Mon, 10 Apr 2017 13:19:30 +0200
Subject: [PATCH 16/21] Make root paths abspaths

* src_root_path and dst_root_path are now converted to abspaths when
initializing kittengroomer to avoid any weird issues with relative or misformed
paths
---
 kittengroomer/helpers.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py
index c7ff3a5..64e7e51 100644
--- a/kittengroomer/helpers.py
+++ b/kittengroomer/helpers.py
@@ -303,8 +303,8 @@ class KittenGroomerBase(object):
 
     def __init__(self, src_root_path, dst_root_path):
         """Initialized with path to source and dest directories."""
-        self.src_root_path = src_root_path
-        self.dst_root_path = dst_root_path
+        self.src_root_path = os.path.abspath(src_root_path)
+        self.dst_root_path = os.path.abspath(dst_root_path)
 
     def safe_rmtree(self, directory_path):
         """Remove a directory tree if it exists."""

From 13460d643dff6a3b4ff74368b08a673f1123cbf5 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Mon, 10 Apr 2017 13:25:51 +0200
Subject: [PATCH 17/21] Remove twiggy from install requirements

---
 CHANGELOG.md         | 9 +++++++++
 dev-requirements.txt | 1 -
 setup.py             | 2 +-
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index df1a217..2158f1b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,15 @@
 Changelog
 =========
 
+2.2.0 (in progress)
+---
+New features:
+-
+
+Fixes:
+-
+
+
 2.1.0
 ---
 
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 8c42eaa..8d2f7f4 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -1,4 +1,3 @@
-twiggy
 python-magic
 pytest
 pytest-cov
diff --git a/setup.py b/setup.py
index 4397c5f..21b03f9 100644
--- a/setup.py
+++ b/setup.py
@@ -23,5 +23,5 @@ setup(
         'Topic :: Communications :: File Sharing',
         'Topic :: Security',
     ],
-    install_requires=['twiggy', 'python-magic'],
+    install_requires=['python-magic'],
 )

From 053f30db932d9b4a9a8956fb4a4ebe29cfd14a45 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Mon, 10 Apr 2017 13:33:00 +0200
Subject: [PATCH 18/21] Partial update to changelog

---
 CHANGELOG.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2158f1b..3a4c16e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,7 +4,11 @@ Changelog
 2.2.0 (in progress)
 ---
 New features:
--
+- Filecheck.py configuration information is now conveniently held in a Config
+object instead of in globals
+- New easier to read text-based logger (removed twiggy dependency)
+- Various filetypes in filecheck.py now have improved descriptions for log
+- Improved the interface for adding file descriptions to files
 
 Fixes:
 -

From 45d71cb362c3bd0bd5442e6dc482d22749c7aabc Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Mon, 10 Apr 2017 13:39:28 +0200
Subject: [PATCH 19/21] Fix unicode filename issues using fsencode

* Same problem we've had before - linux filenames can have non-unicode chars
in them
* We need to write the filename as raw bytes to the log
* os.fsencode lets us convert a utf-8 encoded string to bytes and ignore those
that can't be printed as unicode
* Still not clear if the log generated this way will be human-readable
---
 bin/filecheck.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bin/filecheck.py b/bin/filecheck.py
index 8200eeb..19cb93f 100644
--- a/bin/filecheck.py
+++ b/bin/filecheck.py
@@ -576,12 +576,12 @@ class GroomerLogger(object):
         return path_depth
 
     def _write_line_to_log(self, line, indentation_depth):
-        # TODO: should we use fsencode and fsdecode here instead of just bytestrings?
         padding = b'   '
         padding += b'|  ' * indentation_depth
+        line_bytes = os.fsencode(line)
         with open(self.log_path, mode='ab') as lf:
             lf.write(padding)
-            lf.write(bytes(line, encoding='utf-8'))
+            lf.write(line_bytes)
             lf.write(b'\n')
 
 

From a8179d06885d72e49335263ef2633f3128a20ee7 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Wed, 12 Apr 2017 15:32:57 -0400
Subject: [PATCH 20/21] Read test logs in bytes mode

---
 tests/logging.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/logging.py b/tests/logging.py
index a6d4443..c85d967 100644
--- a/tests/logging.py
+++ b/tests/logging.py
@@ -10,16 +10,16 @@ def save_logs(groomer, test_description):
         test_log.write(str(datetime.now().time()) + '\n')
         test_log.write(test_description + '\n')
         test_log.write('-' * 20 + '\n')
-        with open(groomer.logger.log_path, 'r') as logfile:
+        with open(groomer.logger.log_path, 'rb') as logfile:
             log = logfile.read()
             test_log.write(log)
         if os.path.exists(groomer.logger.log_debug_err):
             test_log.write(divider.format('ERR LOG'))
-            with open(groomer.logger.log_debug_err, 'r') as debug_err:
+            with open(groomer.logger.log_debug_err, 'rb') as debug_err:
                 err = debug_err.read()
                 test_log.write(err)
         if os.path.exists(groomer.logger.log_debug_out):
             test_log.write(divider.format('OUT LOG'))
-            with open(groomer.logger.log_debug_out, 'r') as debug_out:
+            with open(groomer.logger.log_debug_out, 'rb') as debug_out:
                 out = debug_out.read()
                 test_log.write(out)

From d470d6bb21aeea0ce86cd435210458de6337dac6 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Wed, 12 Apr 2017 15:50:02 -0400
Subject: [PATCH 21/21] Open test log in bytes mode

---
 tests/logging.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/tests/logging.py b/tests/logging.py
index c85d967..e705231 100644
--- a/tests/logging.py
+++ b/tests/logging.py
@@ -5,21 +5,24 @@ from datetime import datetime
 def save_logs(groomer, test_description):
     divider = ('=' * 10 + '{}' + '=' * 10 + '\n')
     test_log_path = 'tests/test_logs/{}.log'.format(test_description)
-    with open(test_log_path, 'w+') as test_log:
-        test_log.write(divider.format('TEST LOG'))
-        test_log.write(str(datetime.now().time()) + '\n')
-        test_log.write(test_description + '\n')
-        test_log.write('-' * 20 + '\n')
+    time_now = str(datetime.now().time()) + '\n'
+    with open(test_log_path, 'wb+') as test_log:
+        log_header = divider.format('TEST LOG')
+        test_log.write(bytes(log_header, encoding='utf-8'))
+        test_log.write(bytes(time_now, encoding='utf-8'))
+        test_log.write(bytes(test_description, encoding='utf-8'))
+        test_log.write(b'\n')
+        test_log.write(b'-' * 20 + b'\n')
         with open(groomer.logger.log_path, 'rb') as logfile:
             log = logfile.read()
             test_log.write(log)
         if os.path.exists(groomer.logger.log_debug_err):
-            test_log.write(divider.format('ERR LOG'))
+            test_log.write(bytes(divider.format('ERR LOG'), encoding='utf-8'))
             with open(groomer.logger.log_debug_err, 'rb') as debug_err:
                 err = debug_err.read()
                 test_log.write(err)
         if os.path.exists(groomer.logger.log_debug_out):
-            test_log.write(divider.format('OUT LOG'))
+            test_log.write(bytes(divider.format('OUT LOG'), encoding='utf-8'))
             with open(groomer.logger.log_debug_out, 'rb') as debug_out:
                 out = debug_out.read()
                 test_log.write(out)