From dc76e00554011517985990c14a019160cf6ad414 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Wed, 8 Feb 2017 12:54:40 -0500
Subject: [PATCH 01/36] Removed broken coveralls link in README

---
 README.md | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 19eb6d3..681a6d2 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,11 @@
 [![Build Status](https://travis-ci.org/CIRCL/PyCIRCLean.svg?branch=master)](https://travis-ci.org/CIRCL/PyCIRCLean)
 [![codecov.io](https://codecov.io/github/CIRCL/PyCIRCLean/coverage.svg?branch=master)](https://codecov.io/github/CIRCL/PyCIRCLean?branch=master)
-[![Coverage Status](https://coveralls.io/repos/github/Rafiot/PyCIRCLean/badge.svg?branch=master)](https://coveralls.io/github/Rafiot/PyCIRCLean?branch=master)
 
 # PyCIRCLean
 
 PyCIRCLean is the core Python code used by [CIRCLean](https://github.com/CIRCL/Circlean/), an open-source
-USB key and document sanitizer created by [CIRCL](https://www.circl.lu/). This module has been separated from the 
-device-specific scripts and can be used for dedicated security applications to sanitize documents from hostile environments 
+USB key and document sanitizer created by [CIRCL](https://www.circl.lu/). This module has been separated from the
+device-specific scripts and can be used for dedicated security applications to sanitize documents from hostile environments
 to trusted environments. PyCIRCLean is currently Python 3.3+ only.
 
 # Installation

From 3cc9aa23f25903b51df156be406f59bdcaabae50 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Wed, 8 Feb 2017 13:03:19 -0500
Subject: [PATCH 02/36] Clarify language in README

---
 README.md | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 681a6d2..f0a0cf3 100644
--- a/README.md
+++ b/README.md
@@ -22,8 +22,9 @@ pip install .
 
 # How to use PyCIRCLean
 
-PyCIRCLean is a simple Python library to handle file checking and sanitization. PyCIRCLean is designed as a simple library
-that can be overloaded to cover specific checking and sanitization workflows in different organizations like industrial
+PyCIRCLean is a simple Python library to handle file checking and sanitization.
+PyCIRCLean is designed to be overloaded and extended to cover specific checking
+and sanitization workflows in different organizations such as industrial
 environments or restricted/classified ICT environments. A series of practical examples utilizing PyCIRCLean can be found
 in the [./examples](./examples) directory.
 
@@ -125,9 +126,12 @@ if __name__ == '__main__':
 
 # How to contribute
 
-We welcome contributions (including bug fixes, new code workflows) via pull requests. We are interested in any new workflows
-that can be used to improve security in different organizations. If you see any potential enhancements required to support
-your sanitization workflow, please feel free to open an issue. Read [CONTRIBUTING.md](/CONTRIBUTING.md) for more information.
+We welcome contributions (including bug fixes, new example file processing
+workflows) via pull requests. We are particularly interested in any new workflows
+that can be used to improve security in different organizations. If you see any
+potential enhancements required to support your sanitization workflow, please feel
+free to open an issue. Read [CONTRIBUTING.md](/CONTRIBUTING.md) for more
+information.
 
 
 # License

From 0be926c0eb8bc92f38a585beee830cc0614a3338 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Fri, 10 Feb 2017 16:46:02 -0500
Subject: [PATCH 03/36] Reorganize KittenGroomerBase __init__

---
 kittengroomer/helpers.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py
index 4e82a73..a5f47a2 100644
--- a/kittengroomer/helpers.py
+++ b/kittengroomer/helpers.py
@@ -163,21 +163,21 @@ class KittenGroomerBase(object):
         """Initialized with path to source and dest directories."""
         self.src_root_dir = root_src
         self.dst_root_dir = root_dst
+        self.debug = debug
+        self.cur_file = None
+        # Setup logs
         self.log_root_dir = os.path.join(self.dst_root_dir, 'logs')
         self._safe_rmtree(self.log_root_dir)
         self._safe_mkdir(self.log_root_dir)
         self.log_processing = os.path.join(self.log_root_dir, 'processing.log')
         self.log_content = os.path.join(self.log_root_dir, 'content.log')
-        self.tree(self.src_root_dir)
-
         quick_setup(file=self.log_processing)
         self.log_name = log.name('files')
+
+        self.tree(self.src_root_dir)
         self.resources_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data')
         os.environ["PATH"] += os.pathsep + self.resources_path
 
-        self.cur_file = None
-
-        self.debug = debug
         if self.debug:
             self.log_debug_err = os.path.join(self.log_root_dir, 'debug_stderr.log')
             self.log_debug_out = os.path.join(self.log_root_dir, 'debug_stdout.log')

From 774095d95a0c0b64585394d97503092c8e1c3732 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Fri, 10 Feb 2017 19:17:07 -0500
Subject: [PATCH 04/36] Fix list_all_files docstring

---
 kittengroomer/helpers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py
index a5f47a2..d140ecc 100644
--- a/kittengroomer/helpers.py
+++ b/kittengroomer/helpers.py
@@ -283,7 +283,7 @@ class KittenGroomerBase(object):
             return False
 
     def _list_all_files(self, directory):
-        """Generate an iterator over all the files in a directory tree."""
+        """Generator yield path to all of the files in a directory tree."""
         for root, dirs, files in os.walk(directory):
             for filename in files:
                 filepath = os.path.join(root, filename)

From e2af701ac9ab9234119e79baa8bf0139f5da9357 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Fri, 10 Feb 2017 19:27:53 -0500
Subject: [PATCH 05/36] Remove several pieces of unused code

* Remove python 2 KittenGroomerBase.tree
* Remove default source and dest from KittenGroomerFileCheck
* Remove unused sys import
---
 bin/filecheck.py         |  6 +-----
 kittengroomer/helpers.py | 22 ----------------------
 2 files changed, 1 insertion(+), 27 deletions(-)

diff --git a/bin/filecheck.py b/bin/filecheck.py
index 8440bd6..9a83b27 100644
--- a/bin/filecheck.py
+++ b/bin/filecheck.py
@@ -144,11 +144,7 @@ class File(FileBase):
 
 class KittenGroomerFileCheck(KittenGroomerBase):
 
-    def __init__(self, root_src=None, root_dst=None, max_recursive_depth=2, debug=False):
-        if root_src is None:
-            root_src = os.path.join(os.sep, 'media', 'src')
-        if root_dst is None:
-            root_dst = os.path.join(os.sep, 'media', 'dst')
+    def __init__(self, root_src, root_dst, max_recursive_depth=2, debug=False):
         super(KittenGroomerFileCheck, self).__init__(root_src, root_dst, debug)
         self.recursive_archive_depth = 0
         self.max_recursive_depth = max_recursive_depth
diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py
index d140ecc..3ccbb08 100644
--- a/kittengroomer/helpers.py
+++ b/kittengroomer/helpers.py
@@ -9,7 +9,6 @@ desired behavior.
 
 
 import os
-import sys
 import hashlib
 import shutil
 import argparse
@@ -198,27 +197,6 @@ class KittenGroomerBase(object):
 
     def tree(self, base_dir, padding='   '):
         """Writes a graphical tree to the log for a given directory."""
-        if sys.version_info.major == 2:
-            self.__tree_py2(base_dir, padding)
-        else:
-            self.__tree_py3(base_dir, padding)
-
-    def __tree_py2(self, base_dir, padding='   '):
-        with open(self.log_content, 'ab') as lf:
-            lf.write('#' * 80 + '\n')
-            lf.write('{}+- {}/\n'.format(padding, os.path.basename(os.path.abspath(base_dir))))
-            padding += '|  '
-            files = sorted(os.listdir(base_dir))
-            for f in files:
-                curpath = os.path.join(base_dir, f)
-                if os.path.islink(curpath):
-                    lf.write('{}+-- {}\t- Symbolic link to {}\n'.format(padding, f, os.readlink(curpath)))
-                elif os.path.isdir(curpath):
-                    self.tree(curpath, padding)
-                elif os.path.isfile(curpath):
-                    lf.write('{}+-- {}\t- {}\n'.format(padding, f, self._computehash(curpath)))
-
-    def __tree_py3(self, base_dir, padding='   '):
         with open(self.log_content, 'ab') as lf:
             lf.write(bytes('#' * 80 + '\n', 'UTF-8'))
             lf.write(bytes('{}+- {}/\n'.format(padding, os.path.basename(os.path.abspath(base_dir)).encode()), 'utf8'))

From 92d1b1cd9332ab62d8d6509ccb031c54b81d76ba Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Thu, 16 Feb 2017 17:27:00 -0500
Subject: [PATCH 06/36] Refactor metadata processing code

---
 bin/filecheck.py            | 23 ++++++++++++----------
 kittengroomer/helpers.py    | 38 +++++++++++++++++++------------------
 tests/test_kittengroomer.py | 22 ++++++++++-----------
 3 files changed, 43 insertions(+), 40 deletions(-)

diff --git a/bin/filecheck.py b/bin/filecheck.py
index 9a83b27..1c559f8 100644
--- a/bin/filecheck.py
+++ b/bin/filecheck.py
@@ -427,7 +427,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
 
     #######################
     # Metadata extractors
-    def _metadata_exif(self, metadata_file):
+    def _metadata_exif(self, metadata_file_path):
         img = open(self.cur_file.src_path, 'rb')
         tags = None
 
@@ -457,19 +457,22 @@ class KittenGroomerFileCheck(KittenGroomerBase):
                         printable = value
                     else:
                         printable = str(value)
-                metadata_file.write("Key: {}\tValue: {}\n".format(tag, printable))
+
+                with open(metadata_file_path, 'w+') as metadata_file:
+                    metadata_file.write("Key: {}\tValue: {}\n".format(tag, printable))
         self.cur_file.add_log_details('metadata', 'exif')
         img.close()
         return True
 
-    def _metadata_png(self, metadataFile):
+    def _metadata_png(self, metadata_file_path):
         warnings.simplefilter('error', Image.DecompressionBombWarning)
         try:
             img = Image.open(self.cur_file.src_path)
             for tag in sorted(img.info.keys()):
                 # These are long and obnoxious/binary
                 if tag not in ('icc_profile'):
-                    metadataFile.write("Key: {}\tValue: {}\n".format(tag, img.info[tag]))
+                    with open(metadata_file_path, 'w+') as metadata_file:
+                        metadata_file.write("Key: {}\tValue: {}\n".format(tag, img.info[tag]))
             self.cur_file.add_log_details('metadata', 'png')
             img.close()
         # Catch decompression bombs
@@ -481,12 +484,12 @@ class KittenGroomerFileCheck(KittenGroomerBase):
             return False
 
     def extract_metadata(self):
-        metadata_file = self._safe_metadata_split(".metadata.txt")
-        success = self.metadata_processing_options.get(self.cur_file.mimetype)(metadata_file)
-        metadata_file.close()
-        if not success:
-            # FIXME Delete empty metadata file
-            pass
+        metadata_file_path = self.cur_file.create_metadata_file(".metadata.txt")
+        # todo: write metadata to file
+        mime = self.cur_file.mimetype
+        metadata_processing_method = self.metadata_processing_options.get(mime)
+        if metadata_processing_method:
+            metadata_processing_method(metadata_file_path)
 
     #######################
     # ##### Media - audio and video aren't converted ######
diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py
index 3ccbb08..14cd065 100644
--- a/kittengroomer/helpers.py
+++ b/kittengroomer/helpers.py
@@ -154,6 +154,25 @@ class FileBase(object):
             self.log_details['force_ext'] = True
             self.dst_path += ext
 
+    def create_metadata_file(self, ext):
+        """Create a separate file to hold this file's metadata."""
+        try:
+            # make sure we aren't overwriting anything
+            if os.path.exists(self.src_path + ext):
+                raise KittenGroomerError("Cannot create split metadata file for \"" +
+                                         self.dst_path + "\", type '" +
+                                         ext + "': File exists.")
+            else:
+                # TODO: Uncomment these after object relationships are fixed
+                # dst_dir_path, filename = os.path.split(self.dst_path)
+                # self._safe_mkdir(dst_dir_path)
+                # TODO: Check extension for leading "."
+                self.metadata_file_path = self.dst_path + ext
+                return self.metadata_file_path
+        except KittenGroomerError as e:
+            # TODO: Write to log file
+            return False
+
 
 class KittenGroomerBase(object):
     """Base object responsible for copy/sanitization process."""
@@ -243,25 +262,8 @@ class KittenGroomerBase(object):
             print(e)
             return False
 
-    def _safe_metadata_split(self, ext):
-        """Create a separate file to hold this file's metadata."""
-        # TODO: fix logic in this method
-        dst = self.cur_file.dst_path
-        try:
-            if os.path.exists(self.cur_file.src_path + ext):  # should we check dst_path as well?
-                raise KittenGroomerError("Cannot create split metadata file for \"" +
-                                         self.cur_file.dst_path + "\", type '" +
-                                         ext + "': File exists.")
-            dst_path, filename = os.path.split(dst)
-            self._safe_mkdir(dst_path)
-            return open(dst + ext, 'w+')
-        except Exception as e:
-            # TODO: Logfile
-            print(e)
-            return False
-
     def _list_all_files(self, directory):
-        """Generator yield path to all of the files in a directory tree."""
+        """Generator yielding path to all of the files in a directory tree."""
         for root, dirs, files in os.walk(directory):
             for filename in files:
                 filepath = os.path.join(root, filename)
diff --git a/tests/test_kittengroomer.py b/tests/test_kittengroomer.py
index 9698a95..ac92002 100644
--- a/tests/test_kittengroomer.py
+++ b/tests/test_kittengroomer.py
@@ -213,6 +213,16 @@ class TestFileBase:
         assert generic_conf_file.log_details.get('force_ext') is None
         # shouldn't change a file's extension if it already is right
 
+    def test_create_metadata_file(self, temp_file):
+        # Try making a metadata file
+        metadata_file_path = temp_file.create_metadata_file('.metadata.txt')
+        with open(metadata_file_path, 'w+') as metadata_file:
+            metadata_file.write('Have some metadata!')
+        # Shouldn't be able to make a metadata file with no extension
+        assert temp_file.create_metadata_file('') is False
+        # if metadata file already exists
+        # if there is no metadata to write should this work?
+
 
 class TestKittenGroomerBase:
 
@@ -258,18 +268,6 @@ class TestKittenGroomerBase:
         assert simple_groomer._safe_copy() is True
         #check that it handles weird file path inputs
 
-    def test_safe_metadata_split(self, tmpdir):
-        file = tmpdir.join('test.txt')
-        file.write('testing')
-        simple_groomer = KittenGroomerBase(tmpdir.strpath, tmpdir.strpath)
-        simple_groomer.cur_file = FileBase(file.strpath, file.strpath)
-        metadata_file = simple_groomer._safe_metadata_split('metadata.log')
-        metadata_file.write('Have some metadata!')
-        metadata_file.close()
-        assert simple_groomer._safe_metadata_split('') is False
-        # if metadata file already exists
-        # if there is no metadata to write should this work?
-
     def test_list_all_files(self, tmpdir):
         file = tmpdir.join('test.txt')
         file.write('testing')

From 1cf8a62f46efbb56c95595fbeb0f53f2918ec774 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Mon, 20 Feb 2017 19:03:11 -0500
Subject: [PATCH 07/36] First commit with Logger object

- Made logger object
- Moved some logger related code from Groomer to Logger
- Changed logging related tests
- Filecheck tests still do not pass
---
 bin/filecheck.py            |  1 +
 kittengroomer/__init__.py   |  2 +-
 kittengroomer/helpers.py    | 92 ++++++++++++++++++-------------------
 tests/test_kittengroomer.py | 24 ++++------
 4 files changed, 55 insertions(+), 64 deletions(-)

diff --git a/bin/filecheck.py b/bin/filecheck.py
index 1c559f8..2535f41 100644
--- a/bin/filecheck.py
+++ b/bin/filecheck.py
@@ -204,6 +204,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
     def _run_process(self, command_string, timeout=None):
         """Run command_string in a subprocess, wait until it finishes."""
         args = shlex.split(command_string)
+        # TODO: log_debug_err and log_debug are now broken, fix
         with open(self.log_debug_err, 'ab') as stderr, open(self.log_debug_out, 'ab') as stdout:
             try:
                 subprocess.check_call(args, stdout=stdout, stderr=stderr, timeout=timeout)
diff --git a/kittengroomer/__init__.py b/kittengroomer/__init__.py
index 39aa699..262988a 100644
--- a/kittengroomer/__init__.py
+++ b/kittengroomer/__init__.py
@@ -1,4 +1,4 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-from .helpers import FileBase, KittenGroomerBase, main
+from .helpers import FileBase, KittenGroomerBase, GroomerLog, main
diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py
index 14cd065..f009101 100644
--- a/kittengroomer/helpers.py
+++ b/kittengroomer/helpers.py
@@ -14,7 +14,7 @@ import shutil
 import argparse
 
 import magic
-from twiggy import quick_setup, log
+import twiggy
 
 
 class KittenGroomerError(Exception):
@@ -38,7 +38,7 @@ class FileBase(object):
     or methods relevant to a given implementation.
     """
 
-    def __init__(self, src_path, dst_path):
+    def __init__(self, src_path, dst_path, logger=None):
         """Initialized with the source path and expected destination path."""
         self.src_path = src_path
         self.dst_path = dst_path
@@ -46,6 +46,7 @@ class FileBase(object):
         self.log_string = ''
         self._determine_extension()
         self._determine_mimetype()
+        self.logger = logger
 
     def _determine_extension(self):
         _, ext = os.path.splitext(self.src_path)
@@ -174,46 +175,25 @@ class FileBase(object):
             return False
 
 
-class KittenGroomerBase(object):
-    """Base object responsible for copy/sanitization process."""
+class GroomerLog(object):
+    """Groomer logging object"""
 
-    def __init__(self, root_src, root_dst, debug=False):
-        """Initialized with path to source and dest directories."""
-        self.src_root_dir = root_src
-        self.dst_root_dir = root_dst
-        self.debug = debug
-        self.cur_file = None
-        # Setup logs
-        self.log_root_dir = os.path.join(self.dst_root_dir, 'logs')
-        self._safe_rmtree(self.log_root_dir)
-        self._safe_mkdir(self.log_root_dir)
-        self.log_processing = os.path.join(self.log_root_dir, 'processing.log')
-        self.log_content = os.path.join(self.log_root_dir, 'content.log')
-        quick_setup(file=self.log_processing)
-        self.log_name = log.name('files')
-
-        self.tree(self.src_root_dir)
-        self.resources_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data')
-        os.environ["PATH"] += os.pathsep + self.resources_path
-
-        if self.debug:
-            self.log_debug_err = os.path.join(self.log_root_dir, 'debug_stderr.log')
-            self.log_debug_out = os.path.join(self.log_root_dir, 'debug_stdout.log')
+    def __init__(self, root_dir, debug=False):
+        self.log_dir_path = os.path.join(root_dir, 'logs')
+        if os.path.exists(self.log_dir_path):
+            shutil.rmtree(self.log_dir_path)
+        os.makedirs(self.log_dir_path)
+        self.log_processing = os.path.join(self.log_dir_path, 'processing.log')
+        self.log_content = os.path.join(self.log_dir_path, 'content.log')
+        twiggy.quick_setup(file=self.log_processing)
+        self.log = twiggy.log.name('files')
+        if debug:
+            self.log_debug_err = os.path.join(self.log_dir_path, 'debug_stderr.log')
+            self.log_debug_out = os.path.join(self.log_dir_path, 'debug_stdout.log')
         else:
             self.log_debug_err = os.devnull
             self.log_debug_out = os.devnull
 
-    def _computehash(self, path):
-        """Returns a sha256 hash of a file at a given path."""
-        s = hashlib.sha256()
-        with open(path, 'rb') as f:
-            while True:
-                buf = f.read(0x100000)
-                if not buf:
-                    break
-                s.update(buf)
-        return s.hexdigest()
-
     def tree(self, base_dir, padding='   '):
         """Writes a graphical tree to the log for a given directory."""
         with open(self.log_content, 'ab') as lf:
@@ -230,6 +210,32 @@ class KittenGroomerBase(object):
                 elif os.path.isfile(curpath):
                     lf.write('{}+-- {}\t- {}\n'.format(padding, f, self._computehash(curpath)).encode(errors='ignore'))
 
+    def _computehash(self, path):
+        """Returns a sha256 hash of a file at a given path."""
+        s = hashlib.sha256()
+        with open(path, 'rb') as f:
+            while True:
+                buf = f.read(0x100000)
+                if not buf:
+                    break
+                s.update(buf)
+        return s.hexdigest()
+
+
+class KittenGroomerBase(object):
+    """Base object responsible for copy/sanitization process."""
+
+    def __init__(self, root_src, root_dst, debug=False):
+        """Initialized with path to source and dest directories."""
+        self.src_root_dir = root_src
+        self.dst_root_dir = root_dst
+        self.debug = debug
+        self.cur_file = None
+        self.logger = GroomerLog(self.dst_root_dir, debug)
+        # Add data/ to PATH
+        # self.resources_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data')
+        # os.environ["PATH"] += os.pathsep + self.resources_path
+
     # ##### Helpers #####
     def _safe_rmtree(self, directory):
         """Remove a directory tree if it exists."""
@@ -262,6 +268,7 @@ class KittenGroomerBase(object):
             print(e)
             return False
 
+    # TODO: this isn't a private method, change and edit the groomers as well
     def _list_all_files(self, directory):
         """Generator yielding path to all of the files in a directory tree."""
         for root, dirs, files in os.walk(directory):
@@ -269,18 +276,9 @@ class KittenGroomerBase(object):
                 filepath = os.path.join(root, filename)
                 yield filepath
 
-    def _print_log(self):
-        """
-        Print log, should be called after each file.
-
-        You probably want to reimplement it in the subclass.
-        """
-        tmp_log = self.log_name.fields(**self.cur_file.log_details)
-        tmp_log.info('It did a thing.')
-
     #######################
 
-    def processdir(self, src_dir=None, dst_dir=None):
+    def processdir(self, src_dir, dst_dir):
         """
         Implement this function in your subclass to define file processing behavior.
         """
diff --git a/tests/test_kittengroomer.py b/tests/test_kittengroomer.py
index ac92002..bd16c5a 100644
--- a/tests/test_kittengroomer.py
+++ b/tests/test_kittengroomer.py
@@ -5,7 +5,7 @@ import os
 
 import pytest
 
-from kittengroomer import FileBase, KittenGroomerBase
+from kittengroomer import FileBase, KittenGroomerBase, GroomerLog
 from kittengroomer.helpers import ImplementationRequired
 
 skip = pytest.mark.skip
@@ -224,6 +224,12 @@ class TestFileBase:
         # if there is no metadata to write should this work?
 
 
+class TestLogger:
+    @xfail
+    def test_tree(self, tmpdir):
+        GroomerLog.tree(tmpdir)
+
+
 class TestKittenGroomerBase:
 
     @fixture
@@ -248,15 +254,6 @@ class TestKittenGroomerBase:
                                           debug=True)
         # we should maybe protect access to self.current_file in some way?
 
-    def test_computehash(self, tmpdir):
-        file = tmpdir.join('test.txt')
-        file.write('testing')
-        simple_groomer = KittenGroomerBase(tmpdir.strpath, tmpdir.strpath)
-        simple_groomer._computehash(file.strpath)
-
-    def test_tree(self, generic_groomer):
-        generic_groomer.tree(generic_groomer.src_root_dir)
-
     def test_safe_copy(self, tmpdir):
         file = tmpdir.join('test.txt')
         file.write('testing')
@@ -278,11 +275,6 @@ class TestKittenGroomerBase:
         assert file.strpath in files
         assert testdir.strpath not in files
 
-    def test_print_log(self, generic_groomer):
-        with pytest.raises(AttributeError):
-            generic_groomer._print_log()
-        # Kind of a bad test, but this should be implemented by the user anyway
-
     def test_processdir(self, generic_groomer):
         with pytest.raises(ImplementationRequired):
-            generic_groomer.processdir()
+            generic_groomer.processdir(None, None)

From 7d62238270fa53f2a76a5515dbccc90de2de8bd0 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Mon, 20 Feb 2017 22:38:41 -0500
Subject: [PATCH 08/36] Hacks to make tests pass before fixing

---
 bin/filecheck.py         | 14 ++++++++------
 kittengroomer/helpers.py | 10 ++++++++++
 tests/logging.py         | 10 +++++-----
 3 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/bin/filecheck.py b/bin/filecheck.py
index 2535f41..133ef17 100644
--- a/bin/filecheck.py
+++ b/bin/filecheck.py
@@ -82,8 +82,8 @@ MAL_EXTS = (
 
 class File(FileBase):
 
-    def __init__(self, src_path, dst_path):
-        super(File, self).__init__(src_path, dst_path)
+    def __init__(self, src_path, dst_path, logger):
+        super(File, self).__init__(src_path, dst_path, logger)
         self.is_recursive = False
         self._check_dangerous()
         if self.is_dangerous():
@@ -148,6 +148,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
         super(KittenGroomerFileCheck, self).__init__(root_src, root_dst, debug)
         self.recursive_archive_depth = 0
         self.max_recursive_depth = max_recursive_depth
+        self.log_name = self.logger.log
 
         subtypes_apps = [
             (mimes_office, self._winoffice),
@@ -193,7 +194,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
     def _print_log(self):
         """Print the logs related to the current file being processed."""
         # TODO: change name to _write_log
-        tmp_log = self.log_name.fields(**self.cur_file.log_details)
+        tmp_log = self.logger.log.fields(**self.cur_file.log_details)
         if self.cur_file.is_dangerous():
             tmp_log.warning(self.cur_file.log_string)
         elif self.cur_file.log_details.get('unknown') or self.cur_file.log_details.get('binary'):
@@ -205,7 +206,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
         """Run command_string in a subprocess, wait until it finishes."""
         args = shlex.split(command_string)
         # TODO: log_debug_err and log_debug are now broken, fix
-        with open(self.log_debug_err, 'ab') as stderr, open(self.log_debug_out, 'ab') as stdout:
+        with open(self.logger.log_debug_err, 'ab') as stderr, open(self.logger.log_debug_out, 'ab') as stdout:
             try:
                 subprocess.check_call(args, stdout=stdout, stderr=stderr, timeout=timeout)
             except (subprocess.TimeoutExpired, subprocess.CalledProcessError):
@@ -400,7 +401,8 @@ class KittenGroomerFileCheck(KittenGroomerBase):
         extract_command = '{} -p1 x "{}" -o"{}" -bd -aoa'.format(SEVENZ_PATH, self.cur_file.src_path, tmpdir)
         self._run_process(extract_command)
         self.recursive_archive_depth += 1
-        self.tree(tmpdir)
+        # Broken so commenting out for now:
+        # self.tree(tmpdir)
         self.processdir(tmpdir, self.cur_file.dst_path)
         self.recursive_archive_depth -= 1
         self._safe_rmtree(tmpdir)
@@ -549,7 +551,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
     #######################
 
     def process_file(self, srcpath, dstpath, relative_path):
-        self.cur_file = File(srcpath, dstpath)
+        self.cur_file = File(srcpath, dstpath, self.logger)
         self.log_name.info('Processing {} ({}/{})',
                            relative_path,
                            self.cur_file.main_type,
diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py
index f009101..1e24140 100644
--- a/kittengroomer/helpers.py
+++ b/kittengroomer/helpers.py
@@ -42,6 +42,7 @@ class FileBase(object):
         """Initialized with the source path and expected destination path."""
         self.src_path = src_path
         self.dst_path = dst_path
+        # TODO: rename this. file_information? file_data? file_metadata? file_log?
         self.log_details = {'filepath': self.src_path}
         self.log_string = ''
         self._determine_extension()
@@ -221,6 +222,13 @@ class GroomerLog(object):
                 s.update(buf)
         return s.hexdigest()
 
+    def add_file(self):
+        pass
+        # Should this return a sublog that the file can then write to? Does that work? Too confusing to understand?
+
+    def add_event(self, event, log_level):
+        pass
+
 
 class KittenGroomerBase(object):
     """Base object responsible for copy/sanitization process."""
@@ -278,6 +286,7 @@ class KittenGroomerBase(object):
 
     #######################
 
+    # TODO: feels like this funciton doesn't need to exist if we move main()
     def processdir(self, src_dir, dst_dir):
         """
         Implement this function in your subclass to define file processing behavior.
@@ -285,6 +294,7 @@ class KittenGroomerBase(object):
         raise ImplementationRequired('Please implement processdir.')
 
 
+# TODO: Maybe this shouldn't exist? It should probably get moved to filecheck since this isn't really API code
 def main(kg_implementation, description='Call a KittenGroomer implementation to process files present in the source directory and copy them to the destination directory.'):
     parser = argparse.ArgumentParser(prog='KittenGroomer', description=description)
     parser.add_argument('-s', '--source', type=str, help='Source directory')
diff --git a/tests/logging.py b/tests/logging.py
index e625137..c937a71 100644
--- a/tests/logging.py
+++ b/tests/logging.py
@@ -6,17 +6,17 @@ def save_logs(groomer, test_description):
     test_log_path = 'tests/test_logs/{}.log'.format(test_description)
     with open(test_log_path, 'w+') as test_log:
         test_log.write(divider.format('TEST LOG'))
-        with open(groomer.log_processing, 'r') as logfile:
+        with open(groomer.logger.log_processing, 'r') as logfile:
             log = logfile.read()
             test_log.write(log)
         if groomer.debug:
-            if os.path.exists(groomer.log_debug_err):
+            if os.path.exists(groomer.logger.log_debug_err):
                 test_log.write(divider.format('ERR LOG'))
-                with open(groomer.log_debug_err, 'r') as debug_err:
+                with open(groomer.logger.log_debug_err, 'r') as debug_err:
                     err = debug_err.read()
                     test_log.write(err)
-            if os.path.exists(groomer.log_debug_out):
+            if os.path.exists(groomer.logger.log_debug_out):
                 test_log.write(divider.format('OUT LOG'))
-                with open(groomer.log_debug_out, 'r') as debug_out:
+                with open(groomer.logger.log_debug_out, 'r') as debug_out:
                     out = debug_out.read()
                     test_log.write(out)

From a450fe6b96a557ef344883f3eba1ff8e2feb9556 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Wed, 22 Feb 2017 11:29:20 -0500
Subject: [PATCH 09/36] Add config object to filecheck

- Grouped all configuration options for filecheck into a Config object
- Makes the code easier to read since no longer many references to different
configuration globals
---
 bin/filecheck.py | 188 +++++++++++++++++++++++------------------------
 1 file changed, 94 insertions(+), 94 deletions(-)

diff --git a/bin/filecheck.py b/bin/filecheck.py
index 133ef17..3b1a3a4 100644
--- a/bin/filecheck.py
+++ b/bin/filecheck.py
@@ -9,75 +9,75 @@ import zipfile
 import oletools.oleid
 import olefile
 import officedissector
-
 import warnings
 import exifread
 from PIL import Image
 # from PIL import PngImagePlugin
-
 from pdfid import PDFiD, cPDFiD
 
 from kittengroomer import FileBase, KittenGroomerBase, main
 
+
 SEVENZ_PATH = '/usr/bin/7z'
 
 
-# Prepare application/<subtype>
-mimes_ooxml = ['vnd.openxmlformats-officedocument.']
-mimes_office = ['msword', 'vnd.ms-']
-mimes_libreoffice = ['vnd.oasis.opendocument']
-mimes_rtf = ['rtf', 'richtext']
-mimes_pdf = ['pdf', 'postscript']
-mimes_xml = ['xml']
-mimes_ms = ['dosexec']
-mimes_compressed = ['zip', 'rar', 'bzip2', 'lzip', 'lzma', 'lzop',
-                    'xz', 'compress', 'gzip', 'tar']
-mimes_data = ['octet-stream']
+class Config:
+    # Application subtypes (mimetype: 'application/<subtype>')
+    mimes_ooxml = ['vnd.openxmlformats-officedocument.']
+    mimes_office = ['msword', 'vnd.ms-']
+    mimes_libreoffice = ['vnd.oasis.opendocument']
+    mimes_rtf = ['rtf', 'richtext']
+    mimes_pdf = ['pdf', 'postscript']
+    mimes_xml = ['xml']
+    mimes_ms = ['dosexec']
+    mimes_compressed = ['zip', 'rar', 'bzip2', 'lzip', 'lzma', 'lzop',
+                        'xz', 'compress', 'gzip', 'tar']
+    mimes_data = ['octet-stream']
 
-# Prepare image/<subtype>
-mimes_exif = ['image/jpeg', 'image/tiff']
-mimes_png = ['image/png']
+    # Image subtypes
+    mimes_exif = ['image/jpeg', 'image/tiff']
+    mimes_png = ['image/png']
 
-# Mimetypes we can pull metadata from
-mimes_metadata = ['image/jpeg', 'image/tiff', 'image/png']
+    # Mimetypes with metadata
+    mimes_metadata = ['image/jpeg', 'image/tiff', 'image/png']
 
-# Aliases
-aliases = {
-    # Win executables
-    'application/x-msdos-program': 'application/x-dosexec',
-    'application/x-dosexec': 'application/x-msdos-program',
-    # Other apps with confusing mimetypes
-    'application/rtf': 'text/rtf',
-}
+    # Commonly used malicious extensions
+    # Sources: http://www.howtogeek.com/137270/50-file-extensions-that-are-potentially-dangerous-on-windows/
+    # https://github.com/wiregit/wirecode/blob/master/components/core-settings/src/main/java/org/limewire/core/settings/FilterSettings.java
+    malicious_exts = (
+        # Applications
+        ".exe", ".pif", ".application", ".gadget", ".msi", ".msp", ".com", ".scr",
+        ".hta", ".cpl", ".msc", ".jar",
+        # Scripts
+        ".bat", ".cmd", ".vb", ".vbs", ".vbe", ".js", ".jse", ".ws", ".wsf",
+        ".wsc", ".wsh", ".ps1", ".ps1xml", ".ps2", ".ps2xml", ".psc1", ".psc2",
+        ".msh", ".msh1", ".msh2", ".mshxml", ".msh1xml", ".msh2xml",
+        # Shortcuts
+        ".scf", ".lnk", ".inf",
+        # Other
+        ".reg", ".dll",
+        # Office macro (OOXML with macro enabled)
+        ".docm", ".dotm", ".xlsm", ".xltm", ".xlam", ".pptm", ".potm", ".ppam",
+        ".ppsm", ".sldm",
+        # banned from wirecode
+        ".asf", ".asx", ".au", ".htm", ".html", ".mht", ".vbs",
+        ".wax", ".wm", ".wma", ".wmd", ".wmv", ".wmx", ".wmz", ".wvx",
+    )
 
-# Sometimes, mimetypes.guess_type is giving unexpected results, such as for the .tar.gz files:
-# In [12]: mimetypes.guess_type('toot.tar.gz', strict=False)
-# Out[12]: ('application/x-tar', 'gzip')
-# It works as expected if you do mimetypes.guess_type('application/gzip', strict=False)
-propertype = {'.gz': 'application/gzip'}
+    # Aliases
+    aliases = {
+        # Win executables
+        'application/x-msdos-program': 'application/x-dosexec',
+        'application/x-dosexec': 'application/x-msdos-program',
+        # Other apps with confusing mimetypes
+        'application/rtf': 'text/rtf',
+    }
 
-# Commonly used malicious extensions
-# Sources: http://www.howtogeek.com/137270/50-file-extensions-that-are-potentially-dangerous-on-windows/
-# https://github.com/wiregit/wirecode/blob/master/components/core-settings/src/main/java/org/limewire/core/settings/FilterSettings.java
-MAL_EXTS = (
-    # Applications
-    ".exe", ".pif", ".application", ".gadget", ".msi", ".msp", ".com", ".scr",
-    ".hta", ".cpl", ".msc", ".jar",
-    # Scripts
-    ".bat", ".cmd", ".vb", ".vbs", ".vbe", ".js", ".jse", ".ws", ".wsf",
-    ".wsc", ".wsh", ".ps1", ".ps1xml", ".ps2", ".ps2xml", ".psc1", ".psc2",
-    ".msh", ".msh1", ".msh2", ".mshxml", ".msh1xml", ".msh2xml",
-    # Shortcuts
-    ".scf", ".lnk", ".inf",
-    # Other
-    ".reg", ".dll",
-    # Office macro (OOXML with macro enabled)
-    ".docm", ".dotm", ".xlsm", ".xltm", ".xlam", ".pptm", ".potm", ".ppam",
-    ".ppsm", ".sldm",
-    # banned from wirecode
-    ".asf", ".asx", ".au", ".htm", ".html", ".mht", ".vbs",
-    ".wax", ".wm", ".wma", ".wmd", ".wmv", ".wmx", ".wmz", ".wvx",
-)
+    # Sometimes, mimetypes.guess_type gives unexpected results, such as for .tar.gz files:
+    # In [12]: mimetypes.guess_type('toot.tar.gz', strict=False)
+    # Out[12]: ('application/x-tar', 'gzip')
+    # It works as expected if you do mimetypes.guess_type('application/gzip', strict=False)
+    override_ext = {'.gz': 'application/gzip'}
 
 
 class File(FileBase):
@@ -101,7 +101,7 @@ class File(FileBase):
             self.make_dangerous()
         if not self.has_extension():
             self.make_dangerous()
-        if self.extension in MAL_EXTS:
+        if self.extension in Config.malicious_exts:
             self.log_details.update({'malicious_extension': self.extension})
             self.make_dangerous()
 
@@ -111,12 +111,12 @@ class File(FileBase):
         module's list of valid mimetypes and the expected mimetype based on its
         extension differs from the mimetype determined by libmagic, then it
         marks the file as dangerous."""
-        if propertype.get(self.extension) is not None:
-            expected_mimetype = propertype.get(self.extension)
+        if self.extension in Config.override_ext:
+            expected_mimetype = Config.override_ext[self.extension]
         else:
             expected_mimetype, encoding = mimetypes.guess_type(self.src_path, strict=False)
-            if aliases.get(expected_mimetype) is not None:
-                expected_mimetype = aliases.get(expected_mimetype)
+            if expected_mimetype in Config.aliases:
+                expected_mimetype = Config.aliases[expected_mimetype]
         is_known_extension = self.extension in mimetypes.types_map.keys()
         if is_known_extension and expected_mimetype != self.mimetype:
             self.log_details.update({'expected_mimetype': expected_mimetype})
@@ -126,8 +126,8 @@ class File(FileBase):
         """Takes the mimetype (as determined by libmagic) and determines
         whether the list of extensions that are normally associated with
         that extension contains the file's actual extension."""
-        if aliases.get(self.mimetype) is not None:
-            mimetype = aliases.get(self.mimetype)
+        if self.mimetype in Config.aliases:
+            mimetype = Config.aliases[self.mimetype]
         else:
             mimetype = self.mimetype
         expected_extensions = mimetypes.guess_all_extensions(mimetype, strict=False)
@@ -137,7 +137,7 @@ class File(FileBase):
                 self.make_dangerous()
 
     def has_metadata(self):
-        if self.mimetype in mimes_metadata:
+        if self.mimetype in Config.mimes_metadata:
             return True
         return False
 
@@ -151,23 +151,23 @@ class KittenGroomerFileCheck(KittenGroomerBase):
         self.log_name = self.logger.log
 
         subtypes_apps = [
-            (mimes_office, self._winoffice),
-            (mimes_ooxml, self._ooxml),
-            (mimes_rtf, self.text),
-            (mimes_libreoffice, self._libreoffice),
-            (mimes_pdf, self._pdf),
-            (mimes_xml, self.text),
-            (mimes_ms, self._executables),
-            (mimes_compressed, self._archive),
-            (mimes_data, self._binary_app),
+            (Config.mimes_office, self._winoffice),
+            (Config.mimes_ooxml, self._ooxml),
+            (Config.mimes_rtf, self.text),
+            (Config.mimes_libreoffice, self._libreoffice),
+            (Config.mimes_pdf, self._pdf),
+            (Config.mimes_xml, self.text),
+            (Config.mimes_ms, self._executables),
+            (Config.mimes_compressed, self._archive),
+            (Config.mimes_data, self._binary_app),
         ]
-        self.subtypes_application = self._init_subtypes_application(subtypes_apps)
+        self.app_subtype_methods = self._make_method_dict(subtypes_apps)
 
         types_metadata = [
-            (mimes_exif, self._metadata_exif),
-            (mimes_png, self._metadata_png),
+            (Config.mimes_exif, self._metadata_exif),
+            (Config.mimes_png, self._metadata_png),
         ]
-        self.metadata_processing_options = self._init_subtypes_application(types_metadata)
+        self.metadata_mimetype_methods = self._make_method_dict(types_metadata)
 
         self.mime_processing_options = {
             'text': self.text,
@@ -183,17 +183,17 @@ class KittenGroomerFileCheck(KittenGroomerBase):
         }
 
     # ##### Helper functions #####
-    def _init_subtypes_application(self, subtypes_application):
-        """Creates a dictionary with the right method based on the sub mime type."""
-        subtype_dict = {}
-        for list_subtypes, func in subtypes_application:
-            for st in list_subtypes:
-                subtype_dict[st] = func
-        return subtype_dict
+    def _make_method_dict(self, list_of_tuples):
+        """Returns a dictionary with mimetype: method pairs."""
+        dict_to_return = {}
+        for list_of_subtypes, method in list_of_tuples:
+            for subtype in list_of_subtypes:
+                dict_to_return[subtype] = method
+        return dict_to_return
 
     def _print_log(self):
         """Print the logs related to the current file being processed."""
-        # TODO: change name to _write_log
+        # TODO: change name to _write_log, move to helpers
         tmp_log = self.logger.log.fields(**self.cur_file.log_details)
         if self.cur_file.is_dangerous():
             tmp_log.warning(self.cur_file.log_string)
@@ -205,7 +205,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
     def _run_process(self, command_string, timeout=None):
         """Run command_string in a subprocess, wait until it finishes."""
         args = shlex.split(command_string)
-        # TODO: log_debug_err and log_debug are now broken, fix
+        # TODO: log_debug_err and log_debug are now broken, fix, move to helpers
         with open(self.logger.log_debug_err, 'ab') as stderr, open(self.logger.log_debug_out, 'ab') as stdout:
             try:
                 subprocess.check_call(args, stdout=stdout, stderr=stderr, timeout=timeout)
@@ -250,15 +250,15 @@ class KittenGroomerFileCheck(KittenGroomerBase):
     # ##### Files that will be converted ######
     def text(self):
         """Process an rtf, ooxml, or plaintext file."""
-        for r in mimes_rtf:
-            if r in self.cur_file.sub_type:
+        for mt in Config.mimes_rtf:
+            if mt in self.cur_file.sub_type:
                 self.cur_file.log_string += 'Rich Text file'
                 # TODO: need a way to convert it to plain text
                 self.cur_file.force_ext('.txt')
                 self._safe_copy()
                 return
-        for o in mimes_ooxml:
-            if o in self.cur_file.sub_type:
+        for mt in Config.mimes_ooxml:
+            if mt in self.cur_file.sub_type:
                 self.cur_file.log_string += 'OOXML File'
                 self._ooxml()
                 return
@@ -268,9 +268,9 @@ class KittenGroomerFileCheck(KittenGroomerBase):
 
     def application(self):
         """Processes an application specific file according to its subtype."""
-        for subtype, fct in self.subtypes_application.items():
+        for subtype, method in self.app_subtype_methods.items():
             if subtype in self.cur_file.sub_type:
-                fct()
+                method()
                 self.cur_file.log_string += 'Application file'
                 return
         self.cur_file.log_string += 'Unknown Application file'
@@ -401,8 +401,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
         extract_command = '{} -p1 x "{}" -o"{}" -bd -aoa'.format(SEVENZ_PATH, self.cur_file.src_path, tmpdir)
         self._run_process(extract_command)
         self.recursive_archive_depth += 1
-        # Broken so commenting out for now:
-        # self.tree(tmpdir)
+        self.logger.tree(tmpdir)
         self.processdir(tmpdir, self.cur_file.dst_path)
         self.recursive_archive_depth -= 1
         self._safe_rmtree(tmpdir)
@@ -488,10 +487,10 @@ class KittenGroomerFileCheck(KittenGroomerBase):
 
     def extract_metadata(self):
         metadata_file_path = self.cur_file.create_metadata_file(".metadata.txt")
-        # todo: write metadata to file
-        mime = self.cur_file.mimetype
-        metadata_processing_method = self.metadata_processing_options.get(mime)
+        mt = self.cur_file.mimetype
+        metadata_processing_method = self.metadata_mimetype_methods.get(mt)
         if metadata_processing_method:
+            # TODO: should we return metadata and write it here instead of in processing method?
             metadata_processing_method(metadata_file_path)
 
     #######################
@@ -565,6 +564,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
 
     def processdir(self, src_dir=None, dst_dir=None):
         """Main function coordinating file processing."""
+        # TODO: do we need defaults here?
         if src_dir is None:
             src_dir = self.src_root_dir
         if dst_dir is None:

From 9d2f5c72412b0f6c0639ccb3c0ad7174f5983ef4 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Wed, 22 Feb 2017 16:06:51 -0500
Subject: [PATCH 10/36] Add .jpg test file, testfile_directory.md

- Also fixed a bug with metadata code
---
 kittengroomer/helpers.py    |   6 +++---
 tests/src_valid/Example.jpg | Bin 0 -> 27661 bytes
 tests/testfile_catalog.md   |  12 ++++++++++++
 3 files changed, 15 insertions(+), 3 deletions(-)
 create mode 100644 tests/src_valid/Example.jpg
 create mode 100644 tests/testfile_catalog.md

diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py
index 1e24140..3455bbd 100644
--- a/kittengroomer/helpers.py
+++ b/kittengroomer/helpers.py
@@ -165,9 +165,9 @@ class FileBase(object):
                                          self.dst_path + "\", type '" +
                                          ext + "': File exists.")
             else:
-                # TODO: Uncomment these after object relationships are fixed
-                # dst_dir_path, filename = os.path.split(self.dst_path)
-                # self._safe_mkdir(dst_dir_path)
+                dst_dir_path, filename = os.path.split(self.dst_path)
+                if not os.path.exists(dst_dir_path):
+                    os.makedirs(dst_dir_path)
                 # TODO: Check extension for leading "."
                 self.metadata_file_path = self.dst_path + ext
                 return self.metadata_file_path
diff --git a/tests/src_valid/Example.jpg b/tests/src_valid/Example.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a686d104cd5fcd90f06c6f94c2166c7aea00c11e
GIT binary patch
literal 27661
zcmcfobx<7L7C4FyF2OCh1lI%t1O|6z@L?bU0ts%x9Rk5gfS@x#aG3!HO>ieTf#8En
zaCZ%wn^V84?|iT7y!Yzfdi8c!_aCdf_mbXgt-YlG%=}pfkbzY}ssIcO41oII1Mp`B
zK%?SoZvy~mX#w~E0092q^Y8fc0C=qAZf)*qZN=(i?`g*hwX<eb18M30nFlBVaImnk
zv9WOeUT|=5aPf%n@%|3dM~?`I$Vn+E$Vth`DQOrVQ&Q1Wlatf3(b6+9v9PdEK7PXS
zgqeepnT7d3lVJRP6%QAW1RtM-nTni>`TzF#^Bq8rkHL#UiG}e5fJu&lMUL^O8^8zv
zU}F7c599yv@%M_02f)F{#=`v1T}v_m1{M}378VXJ89pIC76AYQ6AK#$mmENWN699v
zpodRI{luJI<b_*EBF*FH;szd`USY7jZ&lSbV+5j#5O;XTIER?N;Y*9qr2Gk5P6?&t
z0`v+U*D<%UrIodJIHKz>5{AFX|3kw+6aWBN*ch091@-r)FgXAd69XFq0~ZGy>;F9R
zmjrTbioe)#a4DaN=!H<3yCrt8e_J`mqs|-qGY260`y3`Y7CAr;Fe%<jA9qlz;WY=v
zX=r$d0z*A1@^wW(efp(O74&4gCfaG#4ySvHE=tvJBXUORwfvOtp9?s%EKDyxj-JHM
zXZYkVCI+?RbE=WJ(PdaN=4azI!{A7cYr+`Y=Hs66_gU?~pUV!-4~!}b5{O?!W^Jgn
zl?T!N5{piy;+yOo`lbst9Fi3fQ+qFhWLuZ$r1*+Bkl2fBJis#WW?|U=S|%kkZ~)wM
zXH3)0S8rqqHNYOww*%L=AYCLqc!ivJ`9{hDJh8$?p*mmBk9rbsYh`ehZ%MW9j98i#
zUm_tnk7K1}3)N8zdYjrRnmsYZ=tAsl6CVDvnk2$#1)LVF2+B;y=z?B82tOQbN-@?_
zVGc?vPTm;XL3u^KQD>N2Kq64~{r&0^qj+ZjaDn<SYdb|?&E5C*#rB)YY>1|mrFVJV
zH~?2fHcEFyH-IEqa4;_F@y9zIX!PO-DrNZ~#G=AzF<Hi$;^sCA8x|T2-IO0_ex6I*
z`VZCm|FBX+>~P}E*(a2>Db9JhD2}MXuYHN))igi`U34lAjL%||AvuW1Jb#Q*Km0Gs
z0Md2^y{VKFhM)Ho3N-`C=SrbF(rF=%Qo1xg@H#4WKsf=E|C-?)Xf?3i{3Q>SLpQMI
zi@^zpf+l{;DY6Piu{7#K&C>uw0Q@guAz1*u|1$mG@0pi@^+`0Qsadc7p7M3>)Xo9h
z6>9QhUMg&0m||BP1%f=EU_F;<;xC6T>!B!iCyktx!ieEAY**S8a}*-bAT5lI|1~Z0
zUhZ?<*(quu$JrF^pry4gj9U#so|;S7-s?;)K!e*dz~c!X`94q6m4nzH(F9PkqD{kX
zI)fC+(#}kDCb2XKIm($zz@h3)s%oYf9{&qP-mS(*XiVWu=>qyAwGWDF>x~iw`)#E<
zuEIh?KR7xQ#EO)`(4z(HmVEAMf|jOaDU=ysIeop?>ztIi8IlqB#yd*bXm4y-Udl>A
z(TZ5>D-0Gpr>5MOkvg#oI?(w?cSgN-deVt2gI-w4s5kI=_3eB@sm5zXrVefkmA8Jn
z2q9#k%7<bqs<n-18GK*MYSgpdJ|A>G$}){To%<qhlScGA0~2I|;>QP*tgo0Hm#s*$
zF5^DUEzU9D`4z){<={{F>5@6?ca@xX<c_pP-ENCeu^S0~=7vZXjpipebiZ;ToQY4U
z2Eh}|$e`XKvA-9V^+@$JxF!p!SSb|ZqgJ=!-dS2Peu5M66zkC|BV7W$8AA%a1~RI@
zYW3ouwfHZ!(r9~Puq)svf1Hv<SR`2DrBE`CRI|R|{369JS~xIHGI_uO+maYoEf;d;
zhw#2&9Q03hd3~j&nhh(-T_gbnKD#193h{!$KIGGOoUaZ?ahd<u>RtbKSR%OP)K@ub
zh1zzZW{t?r>&*o5nm8)#!V#hPavAhYt$eWGE#@m1A}ZRe@gnYgHUh7lk<U0zXBFOL
zArhbve8FW#p3F6Qnwx(_RP>MB>?o>xjTs*;HCuPp9hOYG$Mox;pc2Tggh^^r701Ef
z3DhA9OAzo!0>bG>(*Cu!dMc1G`DVwJ6!eBdwAa&sn#B?b^a}=9U9m&4RUTSKw1!QA
z#r?nJWx(57d@2#tbEIyQXI%KhyuRRtc61R!no(Ds;GI`L-!6&x@)j)+RU?rHt~BMX
z-5eE?a`?LM1&X1mB@p<cg$S+{MF`p(F2u&J+e#da;`~?p{$+=V8hq<j3QhIPo1EG`
zUsv;@6i_8GI%kn9)WknDqWQF;YNn`#z%M%$r3}$^pMTjI9&S^-`J`N+nMkAASWJvm
zfFZ$N5*+(%^06ysym<0d|4V50A9U87TJB2Ri{?|k=#%yRh!o1usP}0)f*<rR>Zw_u
z7LL~-7<ck=pTB%OWATEVFILsM&RB=rfk7RtYNW7@Ti7Zwa-3>-N4l$r%4M0#nJQ;L
zr1>CkZ@cm355Oj->p-vF!B%rnXU{P4>XBTom~AeW#KAFJ(1%;+g4^fmkdx2&Y6;JI
z6%`>F&T$aHerJKBs<Y0=>x&oM-S2NOlzLchHI5w*431`v$FDxZ_k<?POySJTZd|QS
zi+f+#whlUQ+}Y8#p%Cvrw|tNn2rF&wTPUe_E$;g7_#A!r_i7`#{7$?El2heRoSbPR
zpouuCHoZ)-ErlI!Sh9p1@6&Zmm}72S{S^G&f$QTfQ}C^yd9@NnwC4_up9ply^7Ru#
z&{y_bjk4@p=!G2evu&adJ5#c4pTGha*RYSZ8Ex<<`DoDP`ETTjc1T$3=xK&VY-BKi
z&1#F)$5F*BiEng&7u`hU##PZHbHys6>KBi!rJ0DkE5;}_{c*w~TlgP3<;}Hur|rbz
zxo1U2$ezwepLt8$@}aK^)s@wsdYC-*C<sRuj>2zHc(p3DBhEFp)MRi!jblr9#kzVk
zcJ{B=rKX<0{zKgEe<XK~84)O5J=gbvWID&0kN1A-We_A~((Gv2Ys6*LM8B4fe6-FO
z)S0!>6!pkbW*1DT)>p{<f28#xQ#KGIc`vEKz}*C#zwq91HKtJBB}XA4oX*~jkB~S;
zpVWK3oV~hG%^A4eH6Wh$2k_XLM#8}1y4s5#OstABdZkA-bbu06!{J~$vYle;r|UQz
zWk_C?4b=J%v%yM|frjw)AEr0>`8V{(wQF1%LUCPE)7oCOY$P<J9*xrF!>kwB7;_5a
zoqNTzXBEt}&33<1dscT^dBGV^kh%%`>en6s-bVyZq&&&t2rG`KU%mG~|5uJY6yZ67
z+%yov<gS{xYsEt<#&;`HcFYiIhgXV<F))QpuZJhvng1eaXsTTs+fRwZo2%V3SQ?LG
zxAi@^&c;u$b7kMo!?byqrdc!%M%C$&y;d5RGP)aRimXo9Hgk)+JD>X4w*9F|!2hCp
zE0i0#Q^k7SJ9Jm!2v)ssWcb!B(LXGu+O|KS=rkxB<Kj08-);&ZyS^3o@Y6yv)UB^c
zdBkSg)@)<n+hlNk?tRKVCq0c-l{@Y*mi<a{p+>=+Px3Osll~+0miy!5pN>bLUX;hC
zBZE{7Gw*QfK2pB&<tQTIjgpz9ok74!K563x)vEq?z0+iCCpgi1XPNGax0+TeW*G-r
zeY&uTX7pDaopn>c;<{ozmkL$q<KGP^l_>ZFun!^*ENQ*vH+K1#-YIm3b3Y_WJ9yar
z`k5EIRw3P2{YF-kSUg1{9m}4fA?g+3&io#$eb>MABF|=;|6J}#2B9glmtq$AT)O2{
z$ur|k^)PK{eb<QiGgnoL=V8Gfl>^w#7CNxLM=$M6UpkFpJ=Bsy`7Y=v3gBsA={k)D
zhG0PD+de4Pbneu@zLWgS;|RP6pHwoZy+b853T~3F<&bu>+OX<Co&y%=%n#4nQviYF
z_p&?Ya|g8}UbVG+Y3R_%)8|k|L7x(aN3sfF1GD0v?WVDR<;Umgv+=jz@tS8;$ua#R
zw6vP+Y5XoB?lrd^(E#)vE2m05;kbW-!5+@r<i!cYJCSPp6sE#S+$=8T>LGVcl)E9-
zdC`KNeq34HsZKwD9t$|l=~}X02UltMJ>BzIGx(Qm7#qIG_$ri&L7<TN;D@oAt!CVU
z*VH#OdX%m?_78x-%jNq6l0H4t_P0i95Z(n3dX#kU9}B71%Mz;5cSLkFJ6#&^zo5Ai
z%dX9l4k1k7l??>c7{oDFaIr0io~|FSj(oGdJN8f*e?LSu@u6p|4W08->?jxO0iG%H
z2cTvce<Awe&>@gt!ulbuoMqSU4`5*|vM`wR#?W=}p2=7aJ@yCCh&nG@NxcEqd_etO
z93HClkQ0x%soFT?MBX33v_UTT{T#4Wb@HL9B`}P;kA4cGXJljfm^+-RGUD#36OQA`
zMlK9tRXkP@jCY%E^y>v)vJ+~^dQW?#c+jQxqp005<8Nvwg6mJRLmWa3@i8rCD4LjY
zhQj1=Q45pUkb}N9*>R{^Tx>(&`YrJov$Z>T1G(-Y)4w7!gPq;#aV<tw?vn2XF%7L}
zp*|oO=8bt7>aFSO&*^Tr-%`CSkFU-`LlVh?SiEAxX=aY@ax|o1HyL|{HA9s${z0g=
zwX|!1nv0(?9cnisu5O0LYNw+c(GMH_z5OL!^HpMshBP|Q!HXAvASwuOBugLf5m@b+
zv_spO)5)KxDY16BE*k7wy?N3*F?!K2E0<w-`S~+0)9mX)1G_O3ua9HY1NvH7$k&H$
zCGvYAi!0?ic80tLvlrg1U9iDKj#K1yCuSg;g9v*)`#W$AuUR9#nn-<31r2*fR);!Y
zZ$vW<{LEyrEv+xA*0)<Na8IPk*EU$j_3x;i!Z0BGl0&Wh?ypbH7ue-^vwC^ouqih%
zUhwoh1876<$fwpKIX#j;Cj<7Hxr!N(oTnuvOjWUDyu}<ylDdX*Vf+C^u)oyJSm_5q
zB@Esb^l0j{m!49Qj+AcbGxU|l!yL12oC6`BqZ&d+_lOOIR&CczpI2lkFaJEHh*2E3
zw!J1Dn|XJ)L3<rnsoxdE$jb)$TCTzo8_G+5{9$>N@|f^7kK4B#M#rhlk}OFT=#L|{
zqxnq4o&CO?cuh(Q>4uHLxM}Xzj$>EO8SdmmvE^-gj)ocIA3&OT<%JT<!lVK4CWj1r
zYG+~Hr<K-R-WQhrD<M8cl?ps5RLhd1l{UX~q`v)kJ{i@L;V)e&4P2iSQ*pS*r=#RH
ziA(Qw`>w=fQ(7ed0G>I$*^QKI%x~3Fb|wrI^!0dnJJ&u*^ua^_-?;hFqnQ6M_|Z4U
z$6I$DwI51oA2y>eL;*$rpQZ-zp4YC7T!%UQ0YHWChy1ix>PaRDF1R{tLx_t*d5JuC
z?uls~dq14eu{gZk1Y%8u{lZT1S)!GzE>JP1UcTz!%({vs-QONF3?5qe#|WU;<T_1F
zj^$9Qg~1<;KN#1<HyGVxl*B!|qcD^6wGA3>HJSTgP=K1&Uq7sR3;FPDx~+w`eRcbn
zgU%-X)hMMIH9;cpads9^Nv}X8wBFQBfmcz@kNb_7(J=}-OU)OXlHVOW>$a6?ND&@4
zuKn$b$~XSy>ixJ=sc*{=Ms1vlJvN^L-mk`$wSDGA6{qIMn;HgYCl4xpbiNxV&D@d>
zt`|?M33}X91qs+@nOS|1GObcNeoU&3zSiF-X(sd<mPB#_B;-3ZY@2<~ny&04I-MEx
z%X=qUYw4pg!HW<Ps(9?JueP!acY#`!xQ@q8JNz{k%HFx|UsI>_bC?Qf7Xqq7vKMx%
zoLUB}x;j@`?=y%Ek?n*sqZahVZoLIhC_t~nw^%s=UJ_U`U4SIu?rqd#QHJy!v2E8$
zydkbey8dA@CMS@L55g0~ksPekw}6xUe!N0M(u>CLn?sGHK)Wmz>>x-^u=~s%=*ja7
zP4wP0y{xe?j@iwdX&~Ms*A(a3uIUdO-J6V}1}jgw119S>zx`?Ua#qVXt$>4RLQalL
z9?LgDuB1U;EG??HDjPwT;ySdehGwqML5-HQRWN@x3S&Aa+Z)h8ke2?~T0<L$;YL#g
z-X_F;JPX?k<Te{kAUCFs{sX8kW#RewsA5JnkwhY>_&mH3n`OOWR6qO=paIUkrpt6w
zwi4Lgv*M2C9}1@C3e(tR{lNBp{d>(_G`7QVx>1sKK+2O*L-w~9*pB;Zbi0YA!E>Z-
z+U?SdOmo|Zd~jT5RW&WYMk2x15~ijWq9U2xy$#J!kNRpOh!`ff@@sqIBTh7Pk7e$q
zW5E!`nYlSFcRPJe!|S*~!8`|W=oICN)!s0JqqVA3m}_5gxl2Q*3i}C3z_7HZoL(V9
znk9y9MxXv7!wX535Tr%l*0BkvkngjNE9F$s?2h0#V7*v)<|Qi7#hxh}p;gf1^D4c>
zv=`R?%>=tJlKNphfb>Uvs#dh;Hk=j($5nEvs_0>TP+AU*Qg3b>bSO-H{mz}6Q&I`F
zlf0P2^zwMlX-qB9KB4GITf{pyC0<vORjIy><0+>J11leem2?BoHNmP4`seyem#t$I
zU$IC+=<S${x$|35GJh4tWorz#u{tcQOQ;vD!IS=Xpnr&oiHTxQ|8>o)BC_bppI!%*
zbbkQdR)MHDY1^dQr0*$=e)+zA82(Ic_wwJWjP=4TXGN&|Hw(HzU;kf@9pm2q-eFvJ
z#r7PxM55~$<a@u8bql<3`Crg;45k-%rVIz5yDqbv<HA3Hn1Vo*T$;VuhJAN|jZtWX
z#s_PMuMupVgq>-Y-yAO~j}#sfnXg_Ycm9A?9rNEIeBoTTO*kR%fVJamEo5h(y(pIO
zx<7DoS2wekUN=cBx06^HqYFDNzX@)geks)(2sx%ZWLlfgiGBK`u!pCpuw9H$86{sL
z0$Vx=WBz^hKAV2bbFz*g8d--TX5;f@WhC0do6TfSN2Xj`FHwM)-ifA+!Vqsc-a4La
zVM5$0Yx%JZCbC)II)XIt2jW!PX(jv@$0uqXsB)ASc&)X#z(6hytG6}V%-0;ABR=)j
z3Mq?|;MMZ@r%_jYS|TgtjqmH*k7(lk&#W$2KtGsTTMH_3YIZHF(}j4^RihFdm)jOD
z*kyAyRL{kB98#GbC)kGKwyl5P^*zdfATH*p3g*lAD!he)y1liZy$|fD0GLAd47+NK
z=b|0YA8XQW>s`R>^Iu=ClPsN*qzkUm^@j3D3q`54Wi_)@48bKgO{^_s>6H_Laj$}h
zH>^m83X074Ql5}SaPz8)Q77mncckeEW79p=X-A(fYihoF1{)-mn3_WGP9LTXKMxKE
z$oCm|jj|BfIn7CfR~eq>c1Iq48T=SUlU$1|dHEJ3!4UdY$}Kjea8a>TCybXI0cV@W
z$H%}v<OHc>F||VFZtxVL=e>zNG4M@vR1(km5Rwl&5aRqnyllre8^7{e5qrqIf7@I3
z^`om2h&uhL@|E98?boCanb||$qG1!RV1);DCb8d$XC(4CK|`L=zS5g?95{Ut3h^9h
z+{DRl#z<jJ1nQilQUZU!e{w%U;025-30Jk`eo7<4{;kCFPNoyq#*{mS#2z{4fO)F%
zD_e6yR(9{eFThLeS|$u7SG;=QdLnbh9V~AxGFS#fDk#vqmhNLYD=^~ecdX1T+wZao
z39CRdkc?X;TuqWqD^%L868d$%QQw|dUZypiCFV^DWX?&2pJZ(v*!j#&J$P;$iH%^i
ztX7&dmg0g~w>I**pyvFUOrsYt*kSe1RH<jGtxWpb)}y%)T<QtdTdyW(t_Dr=;_ud)
z$X@8G)cChC`MA67$njPqlOM<_7IhyfhJ=ccAPlo#E+f-*`Bh|}1jVD&g|<?X0`=n-
zbw>*OlCUaj7$Br=c(i{2^ZGoc?4J^y-BK*1E<{_Zgno&^(y2|CAr#U}itxm^_2iX<
z!{5!c{sq>~^^wNm=qbIw_CbuUQEW}_cWitu{~1%j|KFTDhCe!mZFEdXA7vlOj>qJt
zhCu*!2VDZh!geWb-EuyU^J(Kr|8Uu;;eXf_AtB3&jkTRaGTF8Bn>25`gnlf1wxZdz
ztgIaD9~r8cB}Y!H@Q|Gnq%k#Mly?Ifc61&#oLY3E<H3^g2*rDy-3r`&WWAi7c1?dd
z#HZ4#>)pz?>Q}<>ZFHtcwjcb;<8O`+wEKKzNhhdcD_(xUw1Yrfj$o|`5u9J8w{WT!
zji-p`pysEJS5@yk!CwQN;E%mN*9)yfdq+`WV0kZid!HX+$y4|WE83qrm2C7lOXw(f
z6!WK^B8~d5y>RuOkAxJb&if&N6QA5_5xjh$eqEVxrk9$o@Jz46anpgPGy-2-?Wv@O
zGa!YnjpR6n%-x(nlj!(Vp(4-gD}a_o^8O4h4Yim^dK{OiZ!E8i!UTUtNG&MF`zP=5
zaMvVH6qe4?+}NBXndW`(T$;lZeuc+sn>$%fB#a?qI3X{?8L;RD=Hr(M<G3POnZZ*i
z0XvyxrzhD{$bduSA&MbjQzj7;?)-1l>@}Wa&tm}tkJOn!{e;6oG9Yl0k>*ry4$+2!
z$y0uZdJGt1jdd^YDRl!#d*L$ne0q<lNf+atAcikPb6k`3JYUCl#c)`m7{3lJyA!n-
zA+^x^9AMf&rcdf1h*VHY&FnjV#|7;9{Imkqb|O!UVMCsxeZzjbikLZp46bRqHeRo1
zcgnV6DE;BKP8{q7(G~lNf6JC264@ju*+;e3jH%0%niF5IZ*Kt)_pyCzZBhcZFE1Cb
zD#N6r9JQ1Cm})s{=Rzn@m{TH}u$nZwAb-qi=TANVrd%31F{|mz*~6>DL(NOf-9SoN
z65v(<Y^YB|NYwdYvY6>sc}!0-703d2c@Wn;Ep3<;bf)zNZb=RaVd@}tNP_U3)H&L|
z#=Q=~FR`mIy7)Lz`{U*IC-a`tNpO4_h^ffzC3Tl>k&;mev3}MJVbWZ%p<1LDBHO6x
z6@CJ&eB;^YK3luRY2b1RZyK%NHA&r!40XLL-uw#kUVJaZBygH{SCJ_m)=CHF@#MdM
zx4RnU)v}sQQ?B98b5&vtZ{#+bnGjouCxlz9IatE!1KxqGk%1tR@YIO|yu!c&rRs)Q
zbaZ!dd=z^<wWNnnW5cl82e+}75+yONX|ALc{E@P`5W2p|Fy}#n-NC97O#nwjhlQf}
z0iRBMJ$(v;$B(k=IE-|yYt6DL>-j1gq->O22Lj2U6ntsrd#f5-u%I)FMe<W2D0{Wb
zSs<=FbFH!mVQoBlP6eWFBx&eMWKj}iZXQ=Nza$2}>s4`1sRct2H573cDL_F&osHiY
zqH9Ffo@yq^?=xS&jevyE5YH;}B@}BP5*-dYC`Os;&Sx4Tp&Rwu4ZMR$upfZLB5-IV
zRdY_1R5<PcH+MM*YezSBwYhpG$rH=_l`}66IEGn2Hv8L>>I=jNG1J@~=*s7+H*Qwe
z3m+`YAqP##*`GCg&9$sHYM%1Ek|RH$ms`0QVngxNJ4Q1u3zMk2r>dQKtgW2++DgAM
zL`>=@zefHD<dTx-jf!ewV{c%-bC`~(e{#qtm&esoqw{&M*RvFRtr=KtlP+!m`vd6E
zemv0ZUHhPAiOLINc?~X;7v{=@S`tZSXQD50;(rFV(b4lv6S}%4#{f@~k_Nb1Z2thr
z6EBfLmX%JvBM<EC2jIIt;|%6Z=HDFEZO0w=S@e9Pm3VCg0!J;+KG>>8*Wfz;H*6}R
zd!X3;6><+26UDljxS2!*c8+WlL*1g5lTL2Cq%OU7m;M)Yn<#H1<duQXry3J`IQ5eZ
zoRmx3Z`1O|ZJsGnKZ@dsiXT1~D14^7s6*R8B5F(+3?}L&hJ)DSZK@w<a~+?C!=IPT
zoriYnM3brOtb@HO1a+5V4LL<_@z^l%J3x5Y)V6qCUmY@kan1+`Vaf;gb9Zk6H{H1X
zBo3$b9kj>L4!o?F-n+bNTtAlc%Jg5ADg@;S@(BPl85yY((Y)L>WUdN_tPX|!7VEBB
z4lDX#@=WqI3E10yf*((FFv~*fzfxs-0);i`WB|;I{^Fx}t7vdL(L5(7<P&`XL!w}|
zg<@z@t~!KDiV9AGVsD5mFoW(QRTq;!PNExyauVo^F+yh#Nik;cg(td6#&rX5$KtXA
z8>XxE7!op*k_^T`zb^#c?2qkVz<(TIf;c&f1*?GMv(xSKl}1lt65y|jI&Vh>>KqI+
zYulBb^ml&f2~B*u$T-o*zWgA6e%u#0NnelIh_4yODjll+ik(ogdHg@s)FZr#!lk(m
z=AO*#QyN@o0s3<9xm$&SLtf8rCcky64XLVvjSDMQ^>jjR1atQ~50ljI9B5FgGbowX
zy1OnvCzjauDOdFR53zt=B#)dB6y9U<iD0oI+dIHZJmn2~;8?9#!-lqNg^9V^>50Br
z>N0FleWObIqDsdVXx-_ZtQy=XNCR8hu4QY<yfc#=g+^1xl$Q9jsiz^u#zr5OjE;GC
zW-HU7;OgXS3f)zJg6{hW?6nT1K2y5)Is(NFuM`A4g)C*0O0Zod1;t@7SUlyy=)s1m
zcbHSVaghSWrjk^rj`PR{i;L!7o9J{}X+79Np9JWCiYA4wNzy;@mlmxtV3Tf(@|9pE
zx!Ze<oH=lPl51KwG;L(DmT#c-q%A)MQOiwWzj=lxUwALNn-+;9A;@4I!M>;%=`Os8
z)avStmT^9EZ`URgR@YY%fyBS`KTAOTwwwATf*LhOlqg!U@om)IeBx(kqC9f|nIpzZ
zw7fmP`$gJuMiv2QHIQ~p2#!l)rm+-snq5LJJSl<TWK?GRiC@^es^K4l-<@ho?Oz>@
z^b`bGh|J~pIH?g5Fw}W7sYy$GiGTZ;Y`fUMqUz3*-YPPh0|&v&rc`2(=S<bo#GX{%
z6rNS06qTUxUEj9SHd1t}S$cHLD#{p)CQ|?%0aXVr(wk!a(ANh|dv+TQh^GtzRRK5N
z4#pcz5;jRDX1Yts+VOgMe0;A70hr_Cs`_zV$j4-k7lY1H@j5BURz~U`Jx5=UdxHRy
zasjFMv0AZ?wA+t_K9&v~Fw%HgVAC2WA+`I=2R7+$1TbQ&*{G5%9kSh%LnYEIS?&-a
za#72!5yUUWS$p_1u35*Nc}QSu)@}c`Ng=vk62smds0M*lptoi#e{mrT1r2M}o}!%A
zQ-D4k7qUab&NuH+Vh!4T#QYAJ20vw*zVQOzxy3AhQZd5eYQ!t{vPT=Eea30R<2}ta
zM#|)15druUhw-}c28F`1d{q-a`tfSHzanlG`sHX+42<oGUyyHh&O^Y8?P+e6ZIAzY
zR>M^J^c?}Pp)vP2p|vv}qW5oocZrn^Mwgo3%*++;rc6)JH=X3%sjAJvuj5*4|Jjmy
z#1s|%+b;5V9EEB~9TBn2j^&qQWHkI!r9<_82yd!i{s8%`1$d}=sI_Ntt-irBU^M^D
z(Gqk#e4piC|6i!I8AF4XFSo=H;ooF&zgciswssl8<Ljih5@M8*EVuIq+1e^TN5mMV
zh`A=rgF*hx2C7Z-HRo>$n_Y5erL4TmpK0FC@cp5vv8jARXhOnSF4janaSlc~0r(|(
z-UOZUKwv`km=lt6U6ewq)l9KztKEQ+V-VaQ1#4;rB0?J|W)fhT{1zQNvV=^zw<Klb
zUx8a1UsBr1glKn)h1IHN(h%k-3zVP``#s_r+;TRt1W!W0cekM5dA->Vj*m|(%np6?
zk@1pOJswUPzcpsS#q>={X0Uf5-t3dYIl`wTDQhF2UmK?(J3J|uv;=@xuWW}^kuo(x
za%kdO)1;F6w$BUctdWo6Xv`!}s-`VI=dRgnJ1#pBpUG=}mFQ$Vo@Or(q%CcO2Kzg+
z_Axz=&*=gNpnM5u?o|nu9OM^V<#&qv+P0sEA*i3m)z0#zEJRb+<4lp!%)KdTKYU=L
zjq{Jkid*`!7t`yT=T+a;|F~t>GzT|F21hU5yOyJE1^aUzZ>+n&4<pH`*e5lMh2QE)
zX&QPKC5R2~aSg%Geq|AYM%!hV+d?b3AZNF-QLDlnLx8iP$j=11q}_n~XeKqc!%^a#
z-hrWc-Bg`KrB7fU$F!)lw4&(9eewu437IVa?Ly+Q*GVdZX~!w2*~HIQGmWD=dS7Jv
z7jaj-@a&6gIV_sfqNYz(&KRZ~mU2O~%iRI#<2BJcHhXPSR?Rwnbdsf4jQ$sd*33vc
z=-h->()tVPthMWd(r<Q`aRn{Jw%J4md2<(19U`1svCIBIV*wFE0M22_cMg)=_f4Zd
zBP0#+ULET~f*I7ClPOj&^plWPl~g$|Xo0lLFCX#OW!-wqH`sj7kkoto15!XOF_*N8
zAY$494V%c!UC`S#=y*s>gFxm?b+ywX%{a9aJL9j-0&Qu&CS>}q6Wz@{`|M`G;MB`8
zH^K|L`J$9XBkP<FMC#JLv+kuMJe&&_^IX7XP#v9<|6S`e{Imm8o`XmDb+NGSYb6rJ
zjPDBx{>)=pM%slv_W9?vRgouqF06Lx214uy;pH`%j72kn>BOrt%}**7Qwh^O#551A
zP_}lG!z8sj>^NEmX|?#$C7Q58zEK4HMFdC4l5oXK`ydw`2Y%WMhE4I)8*|W!tW48U
zy@P1CNp8RO#|wHtB9M9byScKp5*<64CWbbyd==bELTELO5;45ewgs$Xe%<uGDP8hC
z{otbn%a}=<UDynqUO6RPLZ`0i4dD;t5m%t)JT=}u0Vd*fV!)G?e`*_abm=H<=n|ma
zh|#P#7I*HQijSOzW{R)vXshD6VSDeuzt1)CXYyrW*iy1b;D%*ke%xnCHncV5-wvhx
z8Op+oBL_lslzgr4wy2tKNm2~a{m!~W(KR-ysYDgYtlUQ(#R&QMin|@J$kahRz35Gy
zXf%!j2pT+Nzp^wR>?t=#l9VdgbdW0V&k=3BfT=LgE$L&ohu$+506Cr7^jmLr4D`qp
znZz-9WS)}4BPra~Y;=z3#)!c#T)#v+AC*OdA^B~H?1sIAQDlK=A!RbXuixskv<-&~
z!T|MSy!yh{B6Rl;eAn{){5Xq*ys&uE`wK3SpVk~f$KsT*j3f)Io>vsF^prCEu`(~W
z>$WP|DK4Uj-GYmnre8+Il?dLHe%7(yNTDYdN0L~4#m6d)JdAr%>u&1GTjNXA_N`V&
zu*wxlvbIm3*fA@9Y~S=OS_!#blD6-4NT|K|J@&Vq@p&|T<}~lp>zbm>1Rzgvfc^m<
zo|^Z$#Ufj<L8A&F$0nfW>l!RVe;0Zkmm7}cw<qCP!+A45p}SLcpr?A1$+r+iobyV{
zd+D?8g;EIvi~57cr|rt*mH_<8fAW%^xyxSP6HUC~@IvX{&<t=dz8}G^EOhI>qwwyZ
z+@-#ren}sNvEbELh>V2HF~~M>CF~X`YLI94uRNv$rse-Ha+kP{2w+3<Am7zNF(PbL
zbQz%&IOj-$>S4uRTQGUVzBY1cFWF7YAN9tJs&GDG-knUITax+fwt|ep*b5?uO)XAc
zG>rxyAIQTUw5vX+)7lsH+7qo4=|q0wFrZP>&7W?{2G(B9Dyo-Jt)$qV18!4H6r=d{
z9~rSJvHN1Yy>%Onx0|*eG&{EnJd}WOTqVa3mKDA`rfC#R=_^}R8_X>|JTA;9iq;Vn
zi7kYUf59tBQYCqhD>WQa`dCX5G|MPh!*f{-OFpr!TxVe}?%imC=0Xi~)ercls;U|3
zW)n#R^vSindL0JcW=#~6DS(~c1%S|6GXI>-kQ_T1G4Rmzj+Fmrl6RN%4jE*s@r*6c
z#uY!w5FBtM>GZ4=O@apt3>~!5)Y{Z|odZ+aO7?tvu_Prjr1mmVh1Eqg`vVc*UTe;h
zQ=|u)YX1n|WgvJLyqZ+k3&czkj>k;?Rq-n?;3=uYQ-AIcO}+zbQs35UZ|>vTb8l7e
zGtsTD?G44h4fa0YD!K{mRl{Bv=G%sEp3-~q#K2(!{LEi_-ToGAwPcF($jLa0*e-Zb
zkA}tHn=M*1V?ujmcN3Yo31Q#C5WF@v`K$L$W1os!2k-;jsnoQ(ri_jWjxQZob57q>
z8~Wo_&gL3A&lH?k(*1-s4em)Cb!?Ykfc($=uN72AYBSduKVZc~))J7QTy<f5Qug|#
zW+baf<}8nRmW(&B+@Q>t_5?qu{B`!YwhhM`_-aZ(_6<L4)&@I$SW!L9-C@(GcIn}3
zLvF@<Cb@zu$_SV5-}!#M0G^w5--JIMeP99&7+lV+*dlT=Is91kkkiilu3aPnf=Gsb
zg_oK+OqeGxqialPM8;3Y&r?!<30Iuwnf?JJ8&Y#24T|zMU;97*i3n$RovB_t-C@`u
zRxE?AvT091cNcVxLnEP77<k1yarXhy%SeIe8ItCY#iL*XD4gAhpv=`gYMT5P1ZAml
zfhRw9CbUe9S8NLVD}Ty|M)fsRopu_k)!4qO7>Q@*)Z;~ot~o1%)a5S-M$BawpO@{P
zovy3<Ir={r;3uwHqB-pa-V6-m^^{T;@^7$z{<8151ohDON4}z@n25{Mvc|I~;BfkV
z61(^G1hZNe$vL2T6CVEY+QA_~9AyIPJtFfws(4w5(-UVuzDwMcS>!)JktCO?z87y3
z!8cV)fjBRzH9DPxy=bOJx|pQxQZ^T9X=vzuAV(%Hj^;*o)nwE5mk4me_-y)75Ov6@
z%&AVb($Mr?-Oue+Vr_jRdm+yOkLJqO37uGT_XaPGBGuq-Z?@(k9&q&!g?*6B7FeSW
zh1=94`L3$2L_lq$pL?l{dueTbS~2Iddo%A$QfeT*_h|dZB#OusM)ywWYp00?RK>~z
zniy=b;4b>BNUOd9<#gcjg(82BkjdJtWd4m`#{u0G)AG;`;Z&J={Hr*W=a>Esbti5@
z&;j^wrbg$JGhdeEQ++;>Ny5q~I#d;5R=E0wwweQ}Ia43BmdYuI#bU$TMmp=U8j(;F
z6KF)l+(4aBK^3dO-DA%C@@|f}jLS(Jvf8eZjo@U+GAW~iWiMEMw;L$xsd!?<@01ap
zg}E6#CD-}t!c6^8kNs>fuBdPRiHD+64PV9~?B=IrO~EQR{mY~GeR6qJ3y(9YX<E5Z
zzOu_fj>qQWGYq=V4Dq*NzR4w17GwLspI=D($ah@=V6QlrEar9mrvh-_i#hzF=XEy+
ze|GssGHVr!!&!9>XZ*nGZQEeSIS54TgRYn~N36qdOWv<Y`i=V^F3$d%Hr&&yE9zv+
z0CAHT<?Zt~K!v<_@Ld}sR5U{5h9vf~{7;sGFD#b8U(6Vs%#}GM*-ZPuaRhyElFRTj
zC-~Vc!@Wl%3}fcK$Aqz>hk#a*1WnRc%A_31{Yj~BDnGibN%x@W*m=8QRQKP~<nRkY
zO((Z{`FS|UmP1s$BI(yXy$U7Bc1cQSz36$61!*+cU5;&BD2Ms8$Zg<n2u&(k&T6#w
zV(C7ApWnpuC^1s$YR+!s5O=IfRk^XNAJ^i=q{&7XG-dM~P3&VT(`ofH*8dJc2#*7n
zlZJ1*@Gd>=N?%u9|38gL{uQTSNc9jZ78;K^ykmc=D9_(#|1KmaZTZe<{9&oIf+L*S
z)eZ~J6W4KiX}?2<@0~fX%^~>6iW9`n8VdZ(R(b0*?_X6%2rU-Mwz9s(7?u^y*_XMA
zvm3Cq`DAF<?O;NR)Fh&6fLmd{7ydOr@%3WAxAkvt$Q2brjD{XFo%xOkC3&<Eco>)_
zJAn-R3~>iCS?GZaiZ^H_b-6i`-h=iE=46b>kN2%P1u4{ZX~8Wz4-dmjU&sq**$GhI
zu~yj4koPSW<B>lIu669Mg-iX2yCN_yS9+b22X!wf(dQ=zz={YH8)NYZ>@t#KZiD((
zy9p=b@NR)9vM~*{doO>~9l}vUovXt^1qAJbhGyOtLg7RaUU+!36V)2&5gYCcmpr)!
z8Fd>IG;0;2KcgY&I`nI!0mIZJh^mfW*r{DBB`5ypaR&tv{_+uJYl&L_0eegD(*SK;
zwKo`ubHa-!ViGFe9_nuJ1~{S=o2<IRyWXI5oM#r19GiUL-TvD@-rCcc*rw2RnzP=8
z&UcIdO!21OVbbpORZ?_p;IK4<Z4wn6Q%8GZqPkDZ(fF*9=N;}ZhiGU5hMrPIQ@&hg
z{<r%k?4~`>>q%oBE)xgyMv&X2ErX)Y!`2&@>7AHBYZ2Y@0q#OCrOB<*i&_S;6K`$P
zy2iDLoGKaZ)QP~ypun-%@z7qH2jJjY>MykWxJ0eT6U4&h>e&00ngb_|*wxU$+{eN>
z&Z4uYfsMP<@LzuD-3P}lMQX9c%>x*_9kdEa;lG}JZ=EikyX#XmzJT>j*-2;E3vu_w
zqGCSrO`^UTa|XC@NEhoHzqg$>UgT)<>8_6I`L^9wRq??a%|8G<tU10`S)l2;S|%b#
z;%wvUlF2m}WF-Ml`OeL5H;w2QY-iFMuofb+EK!)`RQfmt2O@aSc88u_l^*rty>YJ0
zeaY+bgWS62sDygAm^mAGo|d$c&BA;9M53Ux|7^9DkNvII;-Pinv1@>olxMU9r}0GE
z!gVt9ySfv`d~oPT_B^cO;d;;C5u50Sy}1@EpD?7#ns%Og>Jyw<W|}uRS`BrL))GgA
zN0TL$Yedlo$N_S%AYM7%8rH|NN=kP2^0?#D3d0JMY;ELX3Q_q;PAhZpcINvJ?V3`*
zLJfFNu;*=unB9;#qP5JGr1twe$XbU+@0n*0QG0delKh71{>t+C{+CG8C_{d)!H<_J
z5<?zBaZ*#2&pHIF)@@(;&{+Ygn`Zz=<K-WpwnH~XXX4<+A)^V>UT*pgEX9+F0;l#n
zIu1rIlREYE=fMkmAw$tAMW&CWAYq}E$<3xf!+sby*G9*dRt?qp=7-KN5RFfS;J6o7
zm!$DJ^bF5=MR5=MGP(_Ty9tOhE2O|ujebXxSX-l<xQiKJ)c0a_6FWsJMgduct2R9D
ziVVX^u1cApKrO#i<s#VGPwD(j%Gop09Nk^|RIBqtb;bw-aVr?OtRwV*&`lzmna_*O
zlmPU1x28T8>j?hdkTj%HAs@r0CX^uZcodtT30F?2OlV&4r51iSN|CDZ-3Ct}ts64#
zSmESk90)hb=;9rBx=o6I^3EGU-3Kw_0e8@{%Y>6puTA>)%$|CyFU`I;C35VFyps(+
z$`t7?uKI-Y$b9L`L%i$n^RgD}nh73ctx1M3N`<Y410^7yHOp#SU%fTPsrFHHFglS_
zc-I+%WZk#^!bfOaC6^P2qNi_isG32l6{?*9$bwU?lN)=OC&Fg_ZatcOwd6jQzgvr{
zvAq^XNQqZNlFEnK$zLtrWj~A%5la=JWge3OZ^jGj0qQbTUOB%Rc#Egb=Q6ubCwa&F
zjkO6Cm~XY|3rL`4+?FbO`f0Mxt7wrG<|`Ivl*39!Mj(xyZ)Na%Pyt%#3#rg^yDY{C
zxQBRg*mduxGWP|(0~<=)g-yk?@YqW#3KH38%hxq2CBiSgYn4i>X?vF9Uw8?<b=JLF
zst-zsald3P)pA=VTP77*9)ZBcb8~*z?a`;Zndy(P%=C!cye@Np;rstCuyIWABwGA$
zhD-bT9p_{_)(y<T4)5ZhU!IEBNIN*!t|EQ*_a<Ba7jzWUwf&RuWzB<=+iSGC!Hqub
z8g*eX`H1<t)`!Qj?(qqi>{QNBr4vT>9y&UrTFi}$J$H9L*uR3iJLtf7!WpG!#aSDk
zR>1HHJNbmnpK%R~$@8uP!fKc+M6D9M2i}*72A$wF?C|>^ua>LwCu%E}JPQCRy05C;
z{5bmEw~p4P-Jtv7g;^Gro3Bs8D_a3g)vr}re;6VCBvEY&qF_sLP^l-IeMBt%JSjPI
zYc_7_B;FZzTq4v9A*+OPfBoCV)abvOR3jeFCWK=fq~>C)Jr;YhS8n_~VbYy9{x$Km
zsr~v-`&!oUVyYwzJoY}kdYN>M&UoUvxB6e#B%t*ohT~A3ii%UwDf?ZI^9#XxKyowt
zRG)U&wP@mJ0v-M93bb`NwLrKyBq6ckeE4~VTB({?U#7<$^YqM@H3J*-=9Rm)(p^KH
zc;egg;)Cwk6w4Z9157l5dIF2x^xGGmXSlOY-yZTYKZuv+>CM7SlUCQ62Ippl7-4az
zK{70^<6UJ7UZVPT?|^&ZpLgG&=XB0e0-`pKhx4w$?y)bVk@1Z?Y~^rjqhc>4-3x)j
z>Ntwy7J;kv<`tZbCr8Z|5Myw+Rl<Is{7r`Z#kJ1c%Ph+4zrTYWki6i-kv>HojlLB>
z!{=npBk7;Rl@cyz>%67^Vlj%1@9VGM@Vb#xLcippJ8tD3u_x$f+2yWApXDc5uI}&&
zpBDs@a^KHctW?+m8bGHWe%X%5ATE4%DSykVU);5~U*eTalspz>q6UWhqJr0MDNbKA
z_D=b8yB+k#^caaJpWE^pe6Awv6KI<YjU&~Cv!3f-?kFghw5rG6y-at85=d;Ry*lXF
zRyYK*IzM_8*AyXqeEY73rmeYAt$}f`{vCx@TfCTwOT=uRU^eTUT3!g*q;S+1r<%q_
zWO(L#wZ5);&5t6irchXKBX8mur)2>J!(P~u+~ri@iar<A>PMEivLr-JOovT@-<2c{
ztD265IhwwazdsK(2^N`)y~gNaEH&Q;H$RxV2{v*kmz!n%u<Q>I%F@lP=FIReT%C!l
z3B$O1%kj6>^0ysj^yIC#r^JcSoRW)*wGzcMXq~iT@UA?tG0a`5H9v&B&tm$maHAF_
z42b8VefF8qkhi*2FtVg-#$1avg0pv^5hb&w+ox9t%I_R^{%QRk&wKVx>df25KUP8C
zWQ;F!^LXnk;*x(hm)kB=!j#Fw?n-mBgvU&e8H{DkJ7QK)Z~F2ePiPAMgu|XxD2o-R
z1~Lor&u$oRGXcj4EAW#5{P}4|`n`5V8>%E1{K{1wleOq$B930QE3M*aZx-o*-t9YW
z4}kFMu`jnzKar%TRkVUCRqRE)k=Z6aPRq;%*qCC`+mqJHsXrCTssb?M0J1<agqizP
zcn|6&qE~@?QjxO8m;Bf1XBV5q!3_)gOxLkG-gnYBR%-s9M*D)61hkLjv5K+1@0#-4
z_JJ}%uzkX0d^aQqDr&&Jjyi6^*AX~hPkd@qZBj68Z-7B<L@YQ|3IQCj7piTHO)ABB
zNj`sq9IY#)QS7wDBtLRjOv=k{%@Whvc^BSBqjOZiIY2#49o{T_I%JWvgQh58!hc0z
ztaExQ<D5-Tat0E!&G<M^Qq%9t4V%MvV$Gh&87cZ(1DcQY^ov!EpzX1SFm}Qe;q?;p
ziznNV-^nDdMQGH!)Iwka$JY)C(4hpAyVS)*e($&Ds?T3fn*;^ZMR+e#c%7jIlG-kd
zR1}r(@#OK3fYzfzVE^Lr$wjI&irH;NkMP75B&VU@D~t4fPF45k(%r4qQxKgtKBlhk
zc4jJC{f?>RB2yH*s^b+I!IB-!^fE8n_q))1FR@Aj4dc!H**9||L<U2^A8QU~bTrS8
zF82-%pf|?PKQ}fuRER?%Kr(pf2i9>#Vx!AnZm#J>g%C_wf(V}3{;g{K!A(rHg@$qU
zupg46h;}9ye`;#GPNls+ntteXenPDN-!&4!j*~cw*{(l;(YFtI^t{t>8G`?>Vae;)
z;G&<UuA7^GJ0+g}{Gy)9EF~Y(+Eax<O3L7Zuqv9J<jRfJ76cxGC$ob!dXqitnd&CH
zVhVWW@g7Ai$nH%4$~I-$UfuZv2qJwbdU>b2MqJ+oi4vMK^=Euxwq7mP@X0IcWlbpt
zNRM@yIT~;dtX-)mL)G7lMc3jc8fkGUQq|f1Fek3roTN&SanE<hziRE1$GP4Zdx#Ut
zxmx}M*!7Y=-TvF||5)et_`?Md*dh^QJ4Y(moKmtO%tOHEx2<#P8Xz;untS>C@_c#{
z-p#Lg{_+oifAXciivxVbfxRO;3fqG!ICiU9^122yWw~zs<qIJfLLuGLW;1)&gqGh)
z92eIfxlQqpDr9<~^T0j%Lyc8}KY)**hatNq)?Z|0(H<AZt;}C9gq}<9P+EvYG1ysc
zxh_#p<XtuF0(a~BR|93w>xsLZ=6yKkEJ6g@EX7AZJepH7cg8GZyy%tDpOz7eOvd%G
zje4)Hcp1a|t;Q#H_2N2v66bmNdkyE5k5%vZ+8%OY{U6l~D050b*wk>u^Td#dZJpEc
zQTGq^3~4;cEU}0|I;cY+&(wGEWm=;&OPtKFYivtoLrZ#10_v0#B1dl<05=6eW7!>l
zE8@-ddPd3uy+=5$Tule;$*48=u6v>(JJ-+K(M~?ZsebBU;RjOUXONs2f237>0*iB&
z_rYtEn)Qk87S5(TRh?54GLeE+oh)m$IWHA|KjoPzQ@0Y%3GGH+1MYN<ay9R4Dr6p*
zmk@`KH58IJzgTEcZ{%Io6)&k&3Q5XzqMTQ9@Q~aG8|DkgHDc9xUragXQ{NR2m!ie!
z%lNHfEuQTb$=l<V51*g1WaB-Qf*)OGbc2tmml}98WlLYECu_%$Uv5g0NoY9wDF>&*
zgb%~o`!vni+zMyD?w#<YGKsYPY#fBJB0t)H7(SLCG=Ib=$$^nTYBoeGU)ldf(YqwW
zxo#jqwcka2oC65-A`p7(eaVA3myvod%~`uag{b;!^{621PQCa?;LH@X`_S`mb4U0y
zktB}?{ns_wWNr@hhOjwGGB^<r0r_!T<KAKVXl*=C0L5qTdV7Ps*RG|*4f9$Z?&%sa
zCgIrF2OLbZw0pk_wV10ez-f2C6b$lGI%b*mo$B^x=q^-GaRX#5vfE{m+4&xE(_`gc
zy%Y<vBMvi}N{adZ-s32k3MamjcHefWyZ@QAr*lvBoBSaqbcS{j-xDaIr9@p)jqmzV
zXPc?f$)f?T@s)dpkB=i7y&(v71vgkDF+7Tbgn&>9!Q8fJyuj#GFIZeY)O4`d7J^|#
zDwC=Xnb8gqsf+$T66K6)${*Cw<KWM(a=8@p6Dt;PId59)=_Xs_WuF}zqUtL!O+xY+
zsE(oXdJaeqOgJO}9e)5VJB5pIpZUk-j0OkB3&0w|dFZ;=dY)-BG<lEXmI`B3hwS&p
zJJ;V|UFZb|LCSCxkwk;K8wu5AFhWr2>y}=*09AF0&QskgJw1E3TbMzB!#A#^GxBny
zr8DBbgvgZGc&zrq>LHg<My+DQH)#@{x9=LtB9(ie<$OF7E1_Ex<I^G(bI17-1mzw7
zqOmVW5m_Jhk)2J+o||^s2frbe@)Lz)<pgi_@^@qlTKCJzpva>QPHi04<X{XJrYO{<
z!vW1e<ih*R+trHRwZ~BDoQAKbNyQ7YaF#-<9ND<sz;@^~uqDTLxD02O0v;0bC9zrV
zM6A{~zNhIisilz9v`T}y{`emN8>K!uN!4nc_i~<9Y_NJiUN^C!$!pJgXJcc&l>5^6
zldJ*>pM;Br%8?1K@-NLhustUycYU7XSN@g=vQhf<U0bZjRiY<D3{KMdX{_Vkxq{O*
zX8`j}`t?DzZt`Xr5wE_b%R67^O<U%ldzvrb=kGiEp`UCb%%Io;%Tju|vIC^u>U*cQ
zYTl$|@0u^^k;X3=>8o@x+=vB`Qyu$DbHT<zx?<HACOP&&zt;PfOK+;##(dxO77x1B
z@9%s*3noZdXt?Vo{dlEaFk3kz&AQs<rB>o%LhU(Z{GH^5hy-4b?`DveQ*=!|>vwm3
zUCt4R@FBV4qv*ZGRuGd^Ku{oG;kEKBR~I&$;Rv_BhM4lIboZTTA(SH^j~zQMPd={o
zX8K|H&{yf$_2!$|9{|4NaF1VOU5^<wit^{{lwbCb+&=u2^r$|yW3KH8;_R1qmWdbE
zs`IYV{J8T<=VobV+QckG-@P1vz#Xnv=zj}Aqd7@<-ZIUA;UUvZuKvvF*A2U+1^Th;
z<3;Esxc@;HF+5L#qb#DF9gC?uyP(Kf_zmip$-wt|71J1^?y_T(Pb<WAr4)9=IR-&K
zEE2~zh0*1*v?nCl@c*ln^9*Y8>(+kgO?pufkSZh?6a=IQQltfGX>=46fzTv0K|ll|
zy(iSrBq4+@z3N}2BM=}I2_U^=0YSxfIA`X4p7+B$bDo*^{&dgWU-qood#!t|eO*5(
z!$R$@tgLYtyG!5mz2}#p3pyRX7iNw-+1Dzj2?TO(MHCjSD;KI^KdIE@d$2#1havYn
z_?v@?KuN>qH8h)89Ak2ti!^k6<|;n8>Gg;9a}mn9BWS<hKLzlb&p<hE_t1G7nZ0e;
zifWzr5?C;&EXcyxAewcj>cHF_otp5!7G-2u^}jR=$S^vW4Kw#7+Or&4a7%lZwDI+o
zWjDu}h(|2VrtHQ)?d{HnHg2Ok7JTr7R>y^7z84Vw;;@mmu%~Ng9a9iGQ=k<iZ!~x0
zOPefZ?sGTmYsW;}%&aOUPh2Iw4qA2&-1;tS@Vf@EdDwt~b_3x$e3W7{1V_fV2d^5Z
z!TpJ;E}+j$OAzB$v)9Y}okc2MX>o#5EgO^T!Y}g!USwj#Qp?0>NWRD$Tm`>Ceee4?
z2I8xZSL&Br<tsm4F-Goi$$Z1SG8vCCu=^CB5+T#o;u+%bbonbs-ja9BTvX1uqVBDU
z0IpwT3fF5e@Nk@k*0fX9Y_V5$n_#G`Z3=tlzJ9D$yyIEt=J2`3Oh3y9mKEPLn9}dP
z#-zu_;D3^p89~_bME9RwJ1lD@4~q^*;7rk#e*t0PA0h4DL}yAY@V*6@?s@IZ(JvXL
zf5e*QAwNo5o;Et<*oJ*Uj;~0@8;5^4*ovN;S)p98896TeV}*?S3rK(@pWRaaYJDBq
zd{^*s4*LX-et~H0+Zc-ER?IRsD`j$LSroHAEihor48!wWpB45R`O+%3wwO)3$}Hxj
z5nDQ0NMplp3a0)FUbAtA=~&+2lu+v36K}V#p1tRj1$T@K8TRiL>oK!sxcEqWR2o3n
z+9<-H`)B~Onq(h&(nXS5edR94O%+H(KTqA1D*0_*&nEnBc-PWbWq2}YC@%S0TjPD%
zZ`7$9g4#e-m7un^?^gP)+^$RU>ml?G_6(f<JKcZo2Iq<$gOt+-S8oNkD%O7zpKcu4
zrP!qCr|N`2Roj}FCI0Ze{0kUWLv`I7y7Z;Y0pg}9sQ>%`L=+}leMq&9$bn(iqwzo)
z@qMrJXEskg=7WaGvS*vn>u3K)DJWlQ$|tkRD^<Zb<TKw1;*H@9Yp*9q*%)po&XU;D
z7hx$IX6hPjEFugFXB(6Ntno)3p>y}f9IEwUAY_DL+g;J<Ckb;okpXLE+$(arYD|@%
z!cetxTONN7JR&d5A;-DENS_gV?t<HTOKVA5g%LfPZL6=bWzY8in=-@c{(9xI^iGgh
z>XFUa2^c~~h5pcyxbaVwp!BN`v2}&m;)4mWNFKeIQKQxKFG2Iz-fp=WM#)}fnwF8)
zwd~ACVe7?75JjO!CuE!DX8c!YdFQY7zv=lXRAX0S3%ZAk8r^?TR_|uFLo55kK|vvB
zxh0`q${Xetr9UhNaX`D!V?~1`4!bWvD+>Fk6*{9&a<hnE!_{t)>3<d-fAFY|9J-%1
zM74^pum8}>eySoizNMYsH2b-6{%dMoS5mZbmTGBh<Ab{5u_X#8`SU7VQj0wtDcct+
z%hhn?;$d&;0kzcE(RR5CN)<O*w&C$uFj}dOBlA3bRg#@=O%u|+m~>ye2CQ++Wn5YM
z{#ngVb>A=Ptd+($r(o4SnUO3<gX)r#1D_myst$PP(t!s{F>CEf-A56M?@9i-e5}=4
z4PjkvTUJr#3H@Y=4)}qEbd%sOX@L(u5PaF8k=Hjb^B-3oCJ!R%>{UV!$Yj&l2b%NG
z1)`4<pLmM(23#|4v@1?vXZ^#Avi?#5iyIeyS#m=t>idSWUSmNE@Ds2SHwErc36eM+
zEnUc2xI}Tes-Pp`ab!Zv8|gV(`?=kKQZY%M`wV?MHZ9dI*{FDLp*z9H<2ZAYN7>TC
z^||IYqdQI;d{g`|qp>aFcEVtJ*K;9zk9+*0vLpxbo+0n2O+jsvrmo&<mkomy2O^p}
zrV_M;aOa}W1ojUvDbQHrThB8Sse8u-t^^M*_Gkx^by4aU-HVQIlhR){$r|4F*{cr{
zxppYojamF>I2_3BSFCGS-o>^zVlpDPx{+R-!lHZg_{>H1zsqs$b{qaB!nNCGtG>U{
z?7JXfp)vQZ4=*%rUun&RY0=E{sb)Mg%>@C!rw7$8)#|%#G_`=-Osq}i%T1u@o2+__
zPmImsvpUbK&^yq|z+rrx=XJ(8H+Y`&=4XA1;IYnfb9kHfTiu{Xn{!DIZJWI8h1pRB
zyytS1juC4LkAA{m5HER7J{_s;Jq0hnovuj!R6+9<9+U{{i&lG8oa8GMIZLvcVB|g&
z^xGxhIy_!et1~U&+(xg!mr{SCJZEN2GoelL_hLRq#EJoG4%IO>xVM5I&P!uX7sqzR
zgEr2|54bc9+`Dw*pm1_@o#Y!Que=?oDasC37DVJ)Nw<n<x#HvxeAw7bPK%>NPOC>+
zy{yh#R$CL{F4fU(hueM~^YI$OkbH?X-TN1p?z4P1EHg1XILtyuMR%|HeKC5r_vP!%
zCq~|MG*`}FfPR+(t#u~=G7v|}e7%mEFIp_LDC;&N(Jsp>{a)Dib{Ln8yKznd&ER06
z3s?_g(7pc`aJAh>dYFDfvNyd?Su+R2ZSxB_HT$;Fp)|)Kw5ekx-k)6<l8I(L5d$Q_
zFb3)?qw?;pLYW<Z0qdx-rKAV6q|o5w%6$D|faX0~YP};vV{rd-|14Sw--7dsU|8R)
ztbU7ghV&)FiF*P<{TJ~Q8UfiEOHUvykJ&A&uQ~m?`bs|BpwZQldOpGBOY^49#Nb$3
z<Wx+%=`Hm)Vyx3S|Bn4v^+lmKx959bdfg-<Pecm5UJSvMhq<665%4~=A)wROGL#}L
zJuGyeVWi`I#8ow`K1fQpo&nq23OTt>k;NfNQ+88E@siZvfC%=_)pCB5o47RYQrV}2
z)6KF~ni&?61*R?LIs(Z@Ylf{wo+R|O5LqywI<@dg6SKh|KKFU_anpN)cxVBs3r_zG
zI1<x(7NoEO*}QbS^pVjHOo;+oK!D3Qo}%9T7@4kn7AJ-kPlZ25ofU8zdyL9xsogs$
z5y-^!J$Nd)4eFoKekCR~hSaDl>?Qk@&1_J}htR*$MRj&wIR3}`+#e?t_Y2lb>!1(Q
z(5D;Q2;^%Hq1~dpTstU#V~|}F7Jk-a%GE}#EXziw@JG$)Z%r@XW-Yx9X+N||>9jZ5
z%i5nlC=Q?eNb7I5Xhj9?5*4$AZm7YH4amw)Fj~P<CN#LTi9c=Y<ZFMGSmXCQ+mcHn
z*0aT1_Kbf4e?-1D$j?L+DAyl!8fBTM!9rVcLb6OJ_THuSsR?HHg5KN`rf~SJ&_%R-
z2dv5VGIhd}`Od1juQtn2-MG}q%w*d7Prr#xCjr~(3n*;Pi*Ai3%{h1uGSdpq$^>Fj
zpHeLK{XAT%v2)BpU##e`rx5h*z_4aOzygAOF=i^)wbyGO4m2HH>6YPL@hJWw!mzou
zrW#`meW6#ClwEDy4&4SG?I<d|^$KPCPKBzplzuMXT_y3YevwPI$|u1*zkijj&SB^x
z#I%9*<Md5?5}&|zS?ILPYH^B-gn^?lgb1AZ=}>>^tI5a*P0XD-@e)tdtDYiE{{nJK
zz(KL<Wy-hy0(geK-kgk>Pm?{M-#AMSg~#vaIB3=>FIDTJi*3q=D+~fe2M;UGj%Phu
z8K<K62Cy5mZG(9d{9Dn}_VX#9&94-IdAK8W<g6=Pn6^oe$9}I$udblqq-OFJ3(7Ck
zts3E+4M&FxdhBjsOg+)0z?p{#S$WZPGkksVJ<+ab3tY)ebv^&k8*`B#i^Xi4I^8-9
z`d|>_N+DLd@lBv|WKF>2?O<AUH6a}mBAf0{2|VEhSi|Ck%FJ<=_(|s^Ag<vjYTsUf
z)k>#_ZHDI%UKb(XMXbVWSDEX3o~d+TX2Z6uSJj#?e02YP4`NJXZmD^QXt$5*h<KQE
znaKG-&^X5+Vuq?WC=2q4NwNCP{<+Hc194PA{E|N8FCe3Yds07BAd^))He~M`Laq8N
zr`hB?P@j8Vvs&p5CU1(+r?N>lU{AK1xH?+?6WoP+<^K%_x?Emf9#z@C+bI8;_p)Fo
zqE4M;@K9UB`fUOuf7j8XkCqW&`vv+;I4t2jv})C1mFHt;qpFM*%bc3l*o%(e2V%fn
ze(z<?0N`dhNf~#RrQ?Z@84usrK@J}mnhKbvnVOG)yb~Q#4i^e|RdX&m+`B5gsm!&s
z!9+J#16XeLRft?m-A@YZG^<vgJW*d4_IZU>_y@~Jcj8q6_uTZ()enb(6imVlMAwgm
zeH>b9Jjb9}FCWX5&WCelpP<eClD#@SP7a_c#;zX_I<PGANHp0mhtiUH(H{tnF)=HP
zRQlM)cs#30`E5j7Eu4a?9iYkL-M+mkbb;Fn5zGc0xOS32#`6Sj3CtCu`nYpG;3M(L
z`@liU1FG-ONPa<~xApi8s9V#J9sr{TFA>UQeIqOuH2cW0<)B+Q4{`b%l=*gdeyXut
ztw&UcO+kYHizmY!qIqkL7L>@YN;B_(J%o`DrGK-xL*xGffDO>MQPKel9MGPImI3Rc
zce_iQ<%>^^zIHY&pBEEu&+am*y}VS`2<IVUz~~Dwl*<NDrsje@GuJzLyqvCf*GIYB
zAAbRb*EjLCez$Tpo+uU4I@Q){pD5J^*b-Rl-SP>%;w1zr{&IaaQ|MGXT_gTV1--8D
z;I67(59kWyJQo?8`>?vL<_J4iIbfztkKCPYHYWlhfW=sXjEj~Lvl5ZSS%I@@%lwT$
z<p}f@6BbW;oL;iJDHmu_bj8>qWsUU8^fQI)LXKgha@R4vY@bPn&HbBmoaR2zivzx!
zZx)meL{0?fg`*Qz8!eR1%Z>7U&rG#1-bA<S*}b`Yol2v1mS3MTNpelo2u|bP&-U)<
z*4ZpUN)D2#Vx$^g+Io8jYzJ#J5W2&00fHq4MBKaPf4GMj%&xw8aob`oxRgJhb1cjt
zaQ2)vFCtBYk6~_(uy2p116fwX0j8RC>s94*<}NC3j)DF#hki|$yqY8GwyvG7QHU=a
z6mE1qn%iY|u@CLgvUaEQ(d-U9_ye*Zr+MF-MQ}N@K{+BuUld%D8}gdB@(9SDCKc2Z
z-J4T}&6&F+|3;QwdPeu|wFiM?A3p~<1<6rvKFA3`9#q!&;?$zvU9i+Euk}yd6%Vyk
zDW^fO>C66QT`8o8Y!^i6u=zwKXntaTvVr9GE_O<9{gXcy8@+0Twql=TkF&IWoDy)m
zt>%Y+$#8-banQ8{SIJ*v=3|nHrAInQthh+<`Qrfr1kPY;w?~&uV^P_}<-~b0GW6FF
z_Pi*mhkU+W&jC$utK-?(#ysxG>13-Gfz0X!9dJ{oF0_Je`5j6$%dsNiRDn3Gh<Z=b
z6QTDEOCJXwQ#f~3X{Z^}4~+Qc3o7xDUpj;DOs+g^#iZy1NtoAv#=fs1uDM-FO$OmW
zmrujO|LDK(O~8{ZX0l)V8%>SgB~NQu@VDC?KZ3PrlR;U2pCtxb9~fj=dMB38q4d`R
zfPMiCi^?hE-Ymh|5oN6+8mlU7lXXiMhmyDq_6=%GgJCI6MA=x8a4gf6XR*;lSq<%7
zhNf+fldBgEPqPAN6e}+_7H*=~q)H}&!kdALE|PXya7@fpsVPhS-h5sF4FQ4YH><4>
z%t?J9K3U^@T5<zaxultpj^`rDm1pd{#6OAUl`?4-cia)xJ!<y>vA0*bAaG2YrOEqB
z;|OtlW6`LS^Gn|_vG=_oT$OG*!K|DziEFmlBHp_t$>-?Vpw_kLjV&y+yiL!otLruK
z(Jc5SB)G<;Zn9EkGbF&RFPBzb<}us~mLc2=($H4j((eSLjw(=b=!wcyFo!)qSAeCU
zER*B|#)@^B8bHx1VXMhYn*+U7Sru>5Xx_g2S(cr#9)!(a)<U9g=GXuUYT5G_lRV!y
zdvZv7XbTkDE~Zo-t)@V<3-v1~yVoBF!Jqf$)t{#u5W8w@<~g?vndGyeA7q%(GM#_K
zFrO4iQ*EETXYC6zggD+^J9j&uHg<hhyaQ@#>0B7(N4R^g!n-?oe)7TKYg*oI7wQeJ
zh*l8+lhQk6c2CP@;ij{(JROG>2x1u=xKf6ca12C@zqq4V-`v%6X#=S4-zqTqS%6Lg
z%r3mIJ}AQz$b$DOpTpfne@OA4_)x7{<fh$TQwdR`WuGO1FR0XsSLSam%I_RO!Y^r8
zjh7!jfCqQfsrjFLQ!XspZARY^2<wEx(ZR<Tm(7x`eewnKU`<{EKcn+|3JPS0Vb3)`
z0FGqXo`iaB;;SSLjU!pb0yVs7$0u_ur-G1*ezfYw-SpF0!TY20f$&g^I_YWs_LKv;
z(j|Ow3SccIkj7W_(cw$2rq=QE#&@Jv)n-#8E%cn$a~&|kuC-6I-AC2FBDd+@F%kjJ
z{*YZ96qKh}pN`|flf!G?NVwN(u4xMV$Lo^o(PWC8YZtCt2n69Qij}KiDVEF%m|ebw
zVoAq;2F2_nK&DrpwMzqv>;@V1c2Ot~HZqx9ay{f<?n%}EC({o#j>!H$d_UB<uKC$R
zr=pGMu1Q2Vo!&VuzI>k}mX>lTik!X^WNs|XCh^H0t`&A4^v~W4F>Zu+foqjbWQ|VV
zSnlbH`WpTlf4kKp?By7J)I}kp_*i>SgQf>-)y#YIVx5nwH?4u2COEnDt@a<Ci9VlT
zzH7}UR@+?uQ_><5rtBtGyR3sV-c?94rR`$a=i%E?1^tevT_)IQZv2pP`OOyOycT>u
zeOIt6ti)aB-tO>cORlU9I)74vj@u?%E3h=pWNTCdKV&x+;q0ru8R6|m`%ogLx|hl+
z(yU7uM(Ka39YM<=&OCjk9xek3b}~F_O{ElZLg0XPK=ogMU)X)huM0<?w2q1`;9cY6
ztpQ!McUwQ|u7}hVSt@Akt<A!P{3zeb_U{@T>E!>(P55e9xRgF{{ZIYTt3MWF4;Po}
zs3##FoVQATm^+99>-TVNl!rfJRkWamakC&eMwS7cly-&eHIIGP9uajeN(IxYC?w~9
znF*~GJT3KF<Xe(Ou)?(>g!^dM7p373;p%+ZPTuP>B_$XFLuYN<_X_WEVk_UPB<?e|
zn|O%z#>Wp^ulz=B3J#y*HedOTjA`HTJ2O}_m@nC=>P)vH?2D)*nqtG-D@NYnt;NvZ
z7Mcg+vvxCkV<?L=fin`ecUT<zj+{OEFdJtSW<bVP^xYAfj++1BcNJtioYjgxqpxI5
z>FHPpcKA5=l^9m)CTXO-NW8W&=&2~jkvoOj4!3G1e8=$4S+wnb4Krs*YJ!+^6>#NQ
zx{g#Iy7sz(RhD*?1Zv180Jqy&2XeZ~HSGIlMO4B<`o)L}oUQpC7j6OzUmTB6dI=j~
zIe9tbfLiM)ka(pd&>sKFU|P=cp;V}sjgp6_Z-tx0vskspS+U#$2`|OB?u7>Ryg}3&
zTcKGB`1(XuJ9%t4>cPRyUpzle3}~V1pdsM5ww{K=6=D^n@a@mLlQ_5&2cg!W=`@q!
z9SQYg6o4h8pqbf@;Ao<P1e3eECSWGP_L)QLiVc~o@&Vg~H3&2r9lhse0%hfRBg`0x
z)Cya364>D~BMo4`YVhDg2{r!3@`}nPL;5X1(*-TAY2FM<A(Gu(C+7YsU)H73ov){U
z`F-wTIU}B&2api?%truWnL??@Pkkq9J-ft6HYKW7(M6)_yJa_TsO&HU=sfprFE++x
zq;q?SFc=L6wOsK3hG=e?M3yBl(`A)*V2<=tJgsZ95tn@w8-5A7sK2#^{yONDBx0)x
zu^YWvMJ4W~+A0cf+7c>0E1L5eI6o1-Is;6p{|o3~GQK@@rDb9fPf){_v9#TX<X_=n
z`U{Xta?bNpHA=B{(w|F1OBKG%i+?(nZ+euHYwx%?`Uc4r!ds%MQ{I^J#WcrfWYwwL
zrC()`UXa~u!bN#CbIR8=f?w3Y90(fD^O|7=LZdBMt;g!xIO!8dkCdI-AKPU~fpV=u
zIB%F^@|%F%=(xp^j|5HTuWM}992#RwQ5w7T((s)I;FMMKK>B;1-qJp`7g;on3`XXs
z9-~L_Y}2+-^FO;}0Z1ajA~}4u3WVOMV3hZ9v(VWs-0Oz0nY`bvk`Z-t80ZJ*&hAzr
z61(%PG8ciI&ARQBdTm5o$+}^5a9h`bx)171aU1Nl`=|I3LLsoDu+t*%PTj}~&hER_
zbg_E!=`@?E-$8=)z}M$d{x-GhDyT*JEFhU9vK>5_0iTCLF{N?hvJcXA=O^QI!!oAC
z4DlB-?)CbL%D?MNo(ygo0mn)*{38>{VAqiMwdwT!qq-em+oMmKIa>>bXAH5g-pQd`
zXDqbE;-`CSd5e`x7yh}ONUZ&hJZc8oyhR8(R$lW|=68yqybf{M$dh~Gb)40iGxCPr
z#DoFhseoRKajLMn*Nf5@eF4|LB-4S7YSjR1WAEWfUSwf)4HlBi4b68!cZtOlenOEc
zCt<M3563n)o?d^NYdny*c6Mdy+aDVFntC)Pa=L16`r~V*U~@!oi|dTEy*07cDVSrD
zV3xni?u-(-=-+h{r^W`fMk4X&f+!+hFuToJ;+xls;(WCqT~uRVznvux`C8T{C}uIp
zQj3y=t&xHmZ4C3wMcLEHEL-Ab1n<{dY^_d-I7uc+1-Cwxrsb#zM5*cLovHGu2I?6f
zksk(w=|24UFV!CE-+(3ETQ&byR#bui{T}t-3@3UN%JWIEbC5&iL2U!|uIWG;dvtDk
zXoCCV7F?#!{zrl7KBuT?8mwH|BFY7o&)Lo_wub1i57sMyC5T85LbFC77hsioCjEM{
z8}2eE=jdjoQvB&VsPE0;HMah9B;=}f<<;v5#r-hT`<dlXJ4>Yt*#3is2IXodpS>N1
z_~DTFa}L~-`AS5K40@IAT9Y8AO$YxT4B478h@5d`k`F>bp0jS7&^;p)akmm{bE~IO
zExRrgZVoVClT7=<u^|8Z*{qQH4U$))!48rh%dC`@mG)TFu;!M}49d^;_%dPY*P?mz
zaQ5>LW+U?^u$O1W5rEw6pN(B6`KwdT`Ixg-9>RjPHP_*&@G|8xE}_BeBk$7G^B0_k
z0$Va)6g^zcjKvd3B;Pi#8(+U4x5`sHBk~$`68QDK3U4-G`0wzK3`;f(M6nrB*ml}3
zt#31onqLuHHK-ZJ*yWDA1ZLEno{j4Te1h~1HQ`7kUl)Bz<^#rzCV6>2Eu{w=KVTT<
zmPJ&8B?;l_zcPGIB!AImW<$M&CQ7|#^KNM>VJ@2riPjHj^I%vsSM&_OlLaHT#JB92
z`wwrG!r0BmjXktCUj-wPdu&1S>uyeVj73pA`YDFD5{@f^H*T7}(1zE+L8>LvYH``m
zFT`p02jh%VcT*|lR)QmRPL)>LB{zapDp!qp6~1^6FC_WKu6uqyxbB$VKK9(1bmZ_P
zQv>3oH^5i!2+~i~eV<BLGrZA<>NIQjZiDAioDz`Lif8`r)Aahe1j$cUj0$oKiH#Z4
zY?iLW>2wdvqpvu8G#t>(U#J907}h{@YzQ|K!Z);q6+FaI!E)Fxo#B?~mwUW;l70^2
z6MFKaDfJ<Wje|HdrDF}_ETrCU=vfpiyw*Cb-x)F9S{vn$^1N#E`XjS<elsGWw){>~
z*hV#UiPbB6#9!Q#mrc(bSC}e$4|l&ZI{Gj(bNfce?l|qbltECwlW)Vmx^Dv8(od5(
z;N)5gmM;p~PfwruIt&iOsOf4_C;Jyy8z;eS$3GJn%|W7_F0^d<6s@uBihW|UCXRky
zt(v6(_(oFCcq!ee1Nh6dqTP?W%YVRDPv{T6$a~RWSr`RtC_2{^wH|*U&$F5-q%XiH
zkjdVd0_dks4lj23viOq7&Haoa=O2q0_^+85q+FxCV0*k|&+ael0ukdWK`I9mN>1h+
zjSxfV@&qX?75(eHy8%vch-)By%2y48ZQBoqzN%Bx^KQ#z+tqx)%v_#>TarY29@|e@
z`ykz>c2si%gI=M+Tb;sS{$_^Y!@>FU;HD~t3&K{5pN!qqPO~@cWkra$AFAqw&~ImE
z_D&v|%foWC6(!x8aM4k*%S3gMc4t_WADK7`OMsTaR`0mR@t%8aP1X;F@9eqC1n}9H
zk)8~?JrVdgNx%RyFlPvjjy(*b@*r#;r?!&9l;AT1QsT)?BU90H*?Yhtd1LE(lKOG9
zekyw+^lUSnO{o4m<NOEkx9VfNe<9zhzcT($f`gyYei{+ABg*-1r+L0b``NuX7QXmN
z$4IF}`VN_!ViL<t0u^F!)X&e~@i&rn5jAL&gO)h~E4qDP^Dl0h`|2jPZ)d0zuaaL|
z5w&O_uG4268V43p?t?8J=$qdPSVD^7_o?eYEBk*a{SYN-h4~ykjIfRh4q^d~pL@bJ
zljg5<7sap|80u$!|9UZ4<>!#SlfAIQ2BBJWb6@5`MaA)6Nypj^`|lmC50F(n5>nx}
zjGBdTYG&L<b00Lgz@*#3!#QK7uu+_>EXZ_!!1BDT_v@NLWx}E-Nz1TGb5DRpCJwnP
z8Z83Nx-_E|ng}?dfAxxtjC$Gf*o`?Wc(nS}uTUO5Myi+(|NKnuCj8B6l_oihHx=KE
z;+uESJ??hyRs<rO8x}-xQpL)ImY#W6D8F-d>wO@dqGVH5K60S4NdW~x<K#*iJwpPh
zejRQk2Sa_;0jXB&de=t&3G1Rz@H8bM@)6XY=}LS146995r^mdhiO6xJtm`d-+VkWu
zwb|~&9pte-2cJQ6)op!I88hhMH^H#@P>s7zJG#bJfeiGjLSs2Tzu5-ho&w*5PGjWp
z#14{3Mh<?H>4+mf9ZmeWki!iXtV9)a`54PVJzs8gbM{Uz)aeXW=#XK^c+_(e191rj
z34O=clogGQnGU5Kh;t5!b_#OTLR~vWzWDno3HBWpp{p@{kJXrkw!jwP(&P$3pFufC
zp~BbJZHgV5DF51uYp(xPHIP?Jp>9z2oICPXju@8r?7$FCpp%(`)dD_~!2(?9WAsoO
z3@rfPj*%*w6l%UABm-=|iDTmLsxg-!LxvMr>Z+?}YeyP(u)x3{SI89sOUbm_5%NtE
zeAN+hU-l}vXBFZ&cahDiC!Q7^*i`xZn)_spr>bUmWHNF7vm@-K#}u(6KaX*6vst*8
zvEoTK-zoC_aX&Mwn&a&x+q}@5@8W0g#KF*%MdAx-^m$7jUMZ<1SgH70yW(Yitj%6j
z_TaG|h2`DnQ|iOK%S8*qroQm6RHu+h<#RFeYR})k`QxsnFc(WmFU`Pj`H$~~-)gom
zdrLHqxqRiZrOq$6{t6Sflk7w4T3aXwmkY{1;bMJQ;I`6e2-SV!%rtX*pnAA0-~0Qa
z$TQ*hfFW3hz#9jjm^4J|KlV{<3wqKah(?vYB}59S?6TqNqI2Qfe8+h3<vZB(pdDY=
zLJnPN{K<NQ5Qd4rT~c>LlMWYRdt#2nPucKu@P<zdydY9P+%K9+WHnF(D#~=?Di|?1
zvEiLP1u~~=nq`#+-O#%lU)V<Z14QXYOZg%Mj^AM9V@Eb&@fg|LJbKcQNGZeWJ6%N&
zZ+n#+klQ_cCntE@IY{xW8N7Z#N}B&d4!<Ns;rJi32Vo_9Ez=M8PElRIQO&kqe;V=`
zWhTpOr754c3eLoK0e5b@h!Wkv#L;C!R;CF{_Cc{PZK|-qG`dCHzk%{Wb{0C~=8)tg
zivoHwIR#6|YO`3x@Dbf<K_b1UUGw(h1y;oTJFR9(qJyy&*`K+X&v0A-8+Ydz`V$qA
z>8nu_1XJJF@U{=F)>81$X_j7+TW(2qbau_Vin{6J;PkMc`-SL7rXvZe^S>ZT)1d;`
z-srD#?r#c31F|2mP|~!61`th>_|t#RMf2To)8$h_%}v`Im%0ulDGDm*Nzq_e?$~!N
zU61$B7B*BEv^Kl5p4D}a$H>CFpjn=CniuBFJS1OKnW)xPA3Fq7yhhTeZ;38nd?a27
zX%($tAQGQ>Qe7QY9&;(~B$(dr%zqFIDg(>=k3$+aF@1vT7}ai1#+7?*bq?s^_Ueb~
z&lBA|2dWS@zBtN*7-j_MzCR4QGh#2Jv%6No^bv542PbPM;R65!z5RI6;RM&po->Rv
zJRyUj>$hYFgyISy09!LN`^-t95;#E`fOl7Cai-Ex!{|H^Mg+2DRD^4)_=LQR)lJ^)
zV{YwuF$?VB3w;uN)z5_iar69{OSm%h7}c!ZETv(lDgxi`io4eR(?)4lV8x~?9ph~=
z@oZ0wdur90GM72ZrD~*tyWLfo9LGbr=2lyY>4EliJCZ^6>4^F022K8w3-Bl6?+G0=
z%5wLZFQKv|Xs7OGBhWe-x{F;Mvc8gu=QEDH5j5M^?lM1fjL0v%%&<S?Dz$RW5&x6y
zs!7EN73j}EW8IoufB?A*7Z}$scO}N)bx1F!GC~HDQ8lM7^Z2l=smyUnheRGnU;V7n
z^{w<0?H|GypZzSGP*4qR!ReGab*#hqO{yy4EwRJ)_>i#|@k4=vM^HkG6YHjYjUKzT
z9BA;Iuuvc8Ux3c$o5ii;3&$EdyDwkei8Ka|Efs{nFNWU}E7TiqT!A&#?4}jwU0X8C
z+<Nh1?VZ_~4-2dOoNrYKgHbzE*=p<436F&+D8yfNvM^cstzTJ?>HR|>-?m~Zb74EB
z{xtP&`dW%hbZI7XWNoR1{t*K6<a6vIPbK$Qr-I<@rd00gZn~gh@em(h6JL3u|A5G&
zG)Gz9%=G%|gKYp{I{aqSw4N(w6UJ-ih8Encp%O}4LqDDu=Zae1BtoDGW(9FjrI-|_
z3Hh(5*U<J{5h@cYz~u4YpHZmT*}z(g0CU=4Y6186KPQ}o-M?u-km@;`O`KLESC
zSAQO-TL0#)tD!3mQY~s$Va8Br!l)FH^Uewhf&upUws(~<a83CKd8S@TH(5{i12$`Y
zHilApizm$KBKZvZyI$HT4^9qp^)b0bMX|hbraX$$kw0$#|M!VY=U?5HN7%|=z{S^p
M#<x?<>Hfa|FCEJ^ivR!s

literal 0
HcmV?d00001

diff --git a/tests/testfile_catalog.md b/tests/testfile_catalog.md
new file mode 100644
index 0000000..692daf8
--- /dev/null
+++ b/tests/testfile_catalog.md
@@ -0,0 +1,12 @@
+src_invalid
+===========
+
+-
+
+
+
+src_valid
+=========
+
+- Example.jpg: image/jpeg, obtained from wikipedia.org
+- blah.conf: text file with a .conf extension
\ No newline at end of file

From 61aa14c98d56362570b85ead89fd41c10202e346 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Thu, 23 Feb 2017 16:49:29 -0500
Subject: [PATCH 11/36] Change _write_log to _print_log

---
 bin/filecheck.py         | 15 ++++++++-------
 kittengroomer/helpers.py |  2 +-
 tests/test_filecheck.py  |  4 +++-
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/bin/filecheck.py b/bin/filecheck.py
index 3b1a3a4..3f9231c 100644
--- a/bin/filecheck.py
+++ b/bin/filecheck.py
@@ -191,9 +191,9 @@ class KittenGroomerFileCheck(KittenGroomerBase):
                 dict_to_return[subtype] = method
         return dict_to_return
 
-    def _print_log(self):
+    def _write_log(self):
         """Print the logs related to the current file being processed."""
-        # TODO: change name to _write_log, move to helpers
+        # TODO: move to helpers
         tmp_log = self.logger.log.fields(**self.cur_file.log_details)
         if self.cur_file.is_dangerous():
             tmp_log.warning(self.cur_file.log_string)
@@ -205,7 +205,6 @@ class KittenGroomerFileCheck(KittenGroomerBase):
     def _run_process(self, command_string, timeout=None):
         """Run command_string in a subprocess, wait until it finishes."""
         args = shlex.split(command_string)
-        # TODO: log_debug_err and log_debug are now broken, fix, move to helpers
         with open(self.logger.log_debug_err, 'ab') as stderr, open(self.logger.log_debug_out, 'ab') as stdout:
             try:
                 subprocess.check_call(args, stdout=stdout, stderr=stderr, timeout=timeout)
@@ -560,18 +559,18 @@ class KittenGroomerFileCheck(KittenGroomerBase):
         else:
             self._safe_copy()
         if not self.cur_file.is_recursive:
-            self._print_log()
+            self._write_log()
 
     def processdir(self, src_dir=None, dst_dir=None):
         """Main function coordinating file processing."""
-        # TODO: do we need defaults here?
+        # TODO: do we actually need defaults here?
         if src_dir is None:
             src_dir = self.src_root_dir
         if dst_dir is None:
             dst_dir = self.dst_root_dir
 
         if self.recursive_archive_depth > 0:
-            self._print_log()
+            self._write_log()
 
         if self.recursive_archive_depth >= self.max_recursive_depth:
             self._handle_archivebomb(src_dir)
@@ -579,9 +578,11 @@ class KittenGroomerFileCheck(KittenGroomerBase):
         for srcpath in self._list_all_files(src_dir):
             dstpath = srcpath.replace(src_dir, dst_dir)
             relative_path = srcpath.replace(src_dir + '/', '')
-            # which path do we want in the log?
             self.process_file(srcpath, dstpath, relative_path)
 
+    def process_archive(self, src_dir, dst_dir):
+        pass
+
 
 if __name__ == '__main__':
     main(KittenGroomerFileCheck, 'File sanitizer used in CIRCLean. Renames potentially dangerous files.')
diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py
index 3455bbd..a19b3f4 100644
--- a/kittengroomer/helpers.py
+++ b/kittengroomer/helpers.py
@@ -286,7 +286,7 @@ class KittenGroomerBase(object):
 
     #######################
 
-    # TODO: feels like this funciton doesn't need to exist if we move main()
+    # TODO: feels like this function doesn't need to exist if we move main()
     def processdir(self, src_dir, dst_dir):
         """
         Implement this function in your subclass to define file processing behavior.
diff --git a/tests/test_filecheck.py b/tests/test_filecheck.py
index ac7cf42..d53eded 100644
--- a/tests/test_filecheck.py
+++ b/tests/test_filecheck.py
@@ -45,4 +45,6 @@ class TestIntegration:
 
 
 class TestFileHandling:
-    pass
+    def test_autorun(self):
+        # Run on a single autorun file, confirm that it gets flagged as dangerous
+        pass

From cfeccc25614089e5afbbc745462ed293c2fb07f7 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Thu, 23 Feb 2017 17:39:16 -0500
Subject: [PATCH 12/36] Move main() into filecheck

- Also change the name of processdir to process_dir
---
 bin/filecheck.py | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/bin/filecheck.py b/bin/filecheck.py
index 3f9231c..d1f6033 100644
--- a/bin/filecheck.py
+++ b/bin/filecheck.py
@@ -5,6 +5,7 @@ import mimetypes
 import shlex
 import subprocess
 import zipfile
+import argparse
 
 import oletools.oleid
 import olefile
@@ -15,7 +16,7 @@ from PIL import Image
 # from PIL import PngImagePlugin
 from pdfid import PDFiD, cPDFiD
 
-from kittengroomer import FileBase, KittenGroomerBase, main
+from kittengroomer import FileBase, KittenGroomerBase
 
 
 SEVENZ_PATH = '/usr/bin/7z'
@@ -389,7 +390,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
 
     def _archive(self):
         """Processes an archive using 7zip. The archive is extracted to a
-        temporary directory and self.processdir is called on that directory.
+        temporary directory and self.process_dir is called on that directory.
         The recursive archive depth is increased to protect against archive
         bombs."""
         self.cur_file.add_log_details('processing_type', 'archive')
@@ -401,7 +402,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
         self._run_process(extract_command)
         self.recursive_archive_depth += 1
         self.logger.tree(tmpdir)
-        self.processdir(tmpdir, self.cur_file.dst_path)
+        self.process_dir(tmpdir, self.cur_file.dst_path)
         self.recursive_archive_depth -= 1
         self._safe_rmtree(tmpdir)
 
@@ -561,14 +562,8 @@ class KittenGroomerFileCheck(KittenGroomerBase):
         if not self.cur_file.is_recursive:
             self._write_log()
 
-    def processdir(self, src_dir=None, dst_dir=None):
+    def process_dir(self, src_dir, dst_dir):
         """Main function coordinating file processing."""
-        # TODO: do we actually need defaults here?
-        if src_dir is None:
-            src_dir = self.src_root_dir
-        if dst_dir is None:
-            dst_dir = self.dst_root_dir
-
         if self.recursive_archive_depth > 0:
             self._write_log()
 
@@ -580,8 +575,17 @@ class KittenGroomerFileCheck(KittenGroomerBase):
             relative_path = srcpath.replace(src_dir + '/', '')
             self.process_file(srcpath, dstpath, relative_path)
 
-    def process_archive(self, src_dir, dst_dir):
-        pass
+    def run(self):
+        self.process_dir(self.src_root_dir, self.src_dst_dir)
+
+
+def main(kg_implementation, description):
+    parser = argparse.ArgumentParser(prog='KittenGroomer', description=description)
+    parser.add_argument('-s', '--source', type=str, help='Source directory')
+    parser.add_argument('-d', '--destination', type=str, help='Destination directory')
+    args = parser.parse_args()
+    kg = kg_implementation(args.source, args.destination)
+    kg.run()
 
 
 if __name__ == '__main__':

From bfa257534a0627d3c1bce08f0838f2658c9ae05d Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Thu, 23 Feb 2017 18:55:46 -0500
Subject: [PATCH 13/36] Remove old data/ path stuff from helpers.py

---
 kittengroomer/helpers.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py
index a19b3f4..b20e6d1 100644
--- a/kittengroomer/helpers.py
+++ b/kittengroomer/helpers.py
@@ -240,9 +240,6 @@ class KittenGroomerBase(object):
         self.debug = debug
         self.cur_file = None
         self.logger = GroomerLog(self.dst_root_dir, debug)
-        # Add data/ to PATH
-        # self.resources_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data')
-        # os.environ["PATH"] += os.pathsep + self.resources_path
 
     # ##### Helpers #####
     def _safe_rmtree(self, directory):

From c6ecc5e3a3d1011d7d79afd62b69f9dcfd7c008b Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Thu, 23 Feb 2017 23:09:00 -0500
Subject: [PATCH 14/36] Fix process_dir bug in filecheck tests

---
 bin/filecheck.py         | 2 +-
 kittengroomer/helpers.py | 2 +-
 tests/test_filecheck.py  | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/bin/filecheck.py b/bin/filecheck.py
index d1f6033..54be51a 100644
--- a/bin/filecheck.py
+++ b/bin/filecheck.py
@@ -576,7 +576,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
             self.process_file(srcpath, dstpath, relative_path)
 
     def run(self):
-        self.process_dir(self.src_root_dir, self.src_dst_dir)
+        self.process_dir(self.src_root_dir, self.dst_root_dir)
 
 
 def main(kg_implementation, description):
diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py
index b20e6d1..e23d89d 100644
--- a/kittengroomer/helpers.py
+++ b/kittengroomer/helpers.py
@@ -60,7 +60,7 @@ class FileBase(object):
         else:
             try:
                 mt = magic.from_file(self.src_path, mime=True)
-                # magic will always return something, even if it's just 'data'
+                # note: magic will always return something, even if it's just 'data'
             except UnicodeEncodeError as e:
                 # FIXME: The encoding of the file is broken (possibly UTF-16)
                 mt = ''
diff --git a/tests/test_filecheck.py b/tests/test_filecheck.py
index d53eded..b573a64 100644
--- a/tests/test_filecheck.py
+++ b/tests/test_filecheck.py
@@ -33,13 +33,13 @@ class TestIntegration:
 
     def test_filecheck(self, src_invalid, dst):
         groomer = KittenGroomerFileCheck(src_invalid, dst, debug=True)
-        groomer.processdir()
+        groomer.run()
         test_description = "filecheck_invalid"
         save_logs(groomer, test_description)
 
     def test_filecheck_2(self, src_valid, dst):
         groomer = KittenGroomerFileCheck(src_valid, dst, debug=True)
-        groomer.processdir()
+        groomer.run()
         test_description = "filecheck_valid"
         save_logs(groomer, test_description)
 

From 4a99ba900a1266aef52a35d9c037ca7ec359a1ec Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Fri, 24 Feb 2017 10:41:59 -0500
Subject: [PATCH 15/36] Make self.get_extension return a value

---
 kittengroomer/helpers.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py
index e23d89d..7e2a433 100644
--- a/kittengroomer/helpers.py
+++ b/kittengroomer/helpers.py
@@ -45,13 +45,13 @@ class FileBase(object):
         # TODO: rename this. file_information? file_data? file_metadata? file_log?
         self.log_details = {'filepath': self.src_path}
         self.log_string = ''
-        self._determine_extension()
+        self.extension = self._determine_extension()
         self._determine_mimetype()
         self.logger = logger
 
     def _determine_extension(self):
         _, ext = os.path.splitext(self.src_path)
-        self.extension = ext.lower()
+        return ext.lower()
 
     def _determine_mimetype(self):
         if os.path.islink(self.src_path):
@@ -60,7 +60,7 @@ class FileBase(object):
         else:
             try:
                 mt = magic.from_file(self.src_path, mime=True)
-                # note: magic will always return something, even if it's just 'data'
+                # Note: magic will always return something, even if it's just 'data'
             except UnicodeEncodeError as e:
                 # FIXME: The encoding of the file is broken (possibly UTF-16)
                 mt = ''
@@ -223,8 +223,9 @@ class GroomerLog(object):
         return s.hexdigest()
 
     def add_file(self):
+        # File object will add itself
+        # return a sublog for the file
         pass
-        # Should this return a sublog that the file can then write to? Does that work? Too confusing to understand?
 
     def add_event(self, event, log_level):
         pass

From 3d36c90d6612ca0ca2386b38d5073fe5ba040eb7 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Fri, 24 Feb 2017 10:43:42 -0500
Subject: [PATCH 16/36] Make list_all_files a public method

---
 bin/filecheck.py            | 2 +-
 examples/generic.py         | 2 +-
 examples/pier9.py           | 2 +-
 examples/specific.py        | 2 +-
 kittengroomer/helpers.py    | 3 +--
 tests/test_kittengroomer.py | 2 +-
 6 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/bin/filecheck.py b/bin/filecheck.py
index 54be51a..a7e8ab5 100644
--- a/bin/filecheck.py
+++ b/bin/filecheck.py
@@ -570,7 +570,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
         if self.recursive_archive_depth >= self.max_recursive_depth:
             self._handle_archivebomb(src_dir)
 
-        for srcpath in self._list_all_files(src_dir):
+        for srcpath in self.list_all_files(src_dir):
             dstpath = srcpath.replace(src_dir, dst_dir)
             relative_path = srcpath.replace(src_dir + '/', '')
             self.process_file(srcpath, dstpath, relative_path)
diff --git a/examples/generic.py b/examples/generic.py
index e76fccd..220b3db 100644
--- a/examples/generic.py
+++ b/examples/generic.py
@@ -339,7 +339,7 @@ class KittenGroomer(KittenGroomerBase):
                 archbomb_path = src_dir[:-len('_temp')]
                 self._safe_remove(archbomb_path)
 
-        for srcpath in self._list_all_files(src_dir):
+        for srcpath in self.list_all_files(src_dir):
             self.cur_file = File(srcpath, srcpath.replace(src_dir, dst_dir))
 
             self.log_name.info('Processing {} ({}/{})', srcpath.replace(src_dir + '/', ''),
diff --git a/examples/pier9.py b/examples/pier9.py
index 6ded725..9252c4f 100644
--- a/examples/pier9.py
+++ b/examples/pier9.py
@@ -54,7 +54,7 @@ class KittenGroomerPier9(KittenGroomerBase):
         '''
             Main function doing the processing
         '''
-        for srcpath in self._list_all_files(self.src_root_dir):
+        for srcpath in self.list_all_files(self.src_root_dir):
             self.log_name.info('Processing {}', srcpath.replace(self.src_root_dir + '/', ''))
             self.cur_file = FilePier9(srcpath, srcpath.replace(self.src_root_dir, self.dst_root_dir))
             if not self.cur_file.is_dangerous() and self.cur_file.extension in self.authorized_extensions:
diff --git a/examples/specific.py b/examples/specific.py
index fcca2f4..724ba35 100644
--- a/examples/specific.py
+++ b/examples/specific.py
@@ -54,7 +54,7 @@ class KittenGroomerSpec(KittenGroomerBase):
         '''
         to_copy = []
         error = []
-        for srcpath in self._list_all_files(self.src_root_dir):
+        for srcpath in self.list_all_files(self.src_root_dir):
             valid = True
             self.log_name.info('Processing {}', srcpath.replace(self.src_root_dir + '/', ''))
             self.cur_file = FileSpec(srcpath, srcpath.replace(self.src_root_dir, self.dst_root_dir))
diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py
index 7e2a433..9f7cced 100644
--- a/kittengroomer/helpers.py
+++ b/kittengroomer/helpers.py
@@ -274,8 +274,7 @@ class KittenGroomerBase(object):
             print(e)
             return False
 
-    # TODO: this isn't a private method, change and edit the groomers as well
-    def _list_all_files(self, directory):
+    def list_all_files(self, directory):
         """Generator yielding path to all of the files in a directory tree."""
         for root, dirs, files in os.walk(directory):
             for filename in files:
diff --git a/tests/test_kittengroomer.py b/tests/test_kittengroomer.py
index bd16c5a..8339e84 100644
--- a/tests/test_kittengroomer.py
+++ b/tests/test_kittengroomer.py
@@ -271,7 +271,7 @@ class TestKittenGroomerBase:
         testdir = tmpdir.join('testdir')
         os.mkdir(testdir.strpath)
         simple_groomer = KittenGroomerBase(tmpdir.strpath, tmpdir.strpath)
-        files = simple_groomer._list_all_files(simple_groomer.src_root_dir)
+        files = simple_groomer.list_all_files(simple_groomer.src_root_dir)
         assert file.strpath in files
         assert testdir.strpath not in files
 

From 53c1598af8e9c351d8b59b0c3dd300f952a9f296 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Mon, 27 Feb 2017 13:22:24 -0600
Subject: [PATCH 17/36] Move file processing methods into File object

- It seems like filecheck will be easier to reason about if all of
the file processing stuff happens in the File object. The Groomer
object will now be responsible only for enumerating the files to
be processed.
- Tests won't pass for this commit, but wanted to make the diff
cleaner but committing this before making changes.
---
 bin/filecheck.py | 97 +++++++++++++++++++++++++-----------------------
 1 file changed, 50 insertions(+), 47 deletions(-)

diff --git a/bin/filecheck.py b/bin/filecheck.py
index a7e8ab5..8f941f7 100644
--- a/bin/filecheck.py
+++ b/bin/filecheck.py
@@ -96,6 +96,38 @@ class File(FileBase):
         self._check_extension()
         self._check_mime()
 
+        subtypes_apps = [
+            (Config.mimes_office, self._winoffice),
+            (Config.mimes_ooxml, self._ooxml),
+            (Config.mimes_rtf, self.text),
+            (Config.mimes_libreoffice, self._libreoffice),
+            (Config.mimes_pdf, self._pdf),
+            (Config.mimes_xml, self.text),
+            (Config.mimes_ms, self._executables),
+            (Config.mimes_compressed, self._archive),
+            (Config.mimes_data, self._binary_app),
+        ]
+        self.app_subtype_methods = self._make_method_dict(subtypes_apps)
+
+        types_metadata = [
+            (Config.mimes_exif, self._metadata_exif),
+            (Config.mimes_png, self._metadata_png),
+        ]
+        self.metadata_mimetype_methods = self._make_method_dict(types_metadata)
+
+        self.mime_processing_options = {
+            'text': self.text,
+            'audio': self.audio,
+            'image': self.image,
+            'video': self.video,
+            'application': self.application,
+            'example': self.example,
+            'message': self.message,
+            'model': self.model,
+            'multipart': self.multipart,
+            'inode': self.inode,
+        }
+
     def _check_dangerous(self):
         if not self.has_mimetype():
             # No mimetype, should not happen.
@@ -137,51 +169,8 @@ class File(FileBase):
                 self.log_details.update({'expected_extensions': expected_extensions})
                 self.make_dangerous()
 
-    def has_metadata(self):
-        if self.mimetype in Config.mimes_metadata:
-            return True
-        return False
-
-
-class KittenGroomerFileCheck(KittenGroomerBase):
-
-    def __init__(self, root_src, root_dst, max_recursive_depth=2, debug=False):
-        super(KittenGroomerFileCheck, self).__init__(root_src, root_dst, debug)
-        self.recursive_archive_depth = 0
-        self.max_recursive_depth = max_recursive_depth
-        self.log_name = self.logger.log
-
-        subtypes_apps = [
-            (Config.mimes_office, self._winoffice),
-            (Config.mimes_ooxml, self._ooxml),
-            (Config.mimes_rtf, self.text),
-            (Config.mimes_libreoffice, self._libreoffice),
-            (Config.mimes_pdf, self._pdf),
-            (Config.mimes_xml, self.text),
-            (Config.mimes_ms, self._executables),
-            (Config.mimes_compressed, self._archive),
-            (Config.mimes_data, self._binary_app),
-        ]
-        self.app_subtype_methods = self._make_method_dict(subtypes_apps)
-
-        types_metadata = [
-            (Config.mimes_exif, self._metadata_exif),
-            (Config.mimes_png, self._metadata_png),
-        ]
-        self.metadata_mimetype_methods = self._make_method_dict(types_metadata)
-
-        self.mime_processing_options = {
-            'text': self.text,
-            'audio': self.audio,
-            'image': self.image,
-            'video': self.video,
-            'application': self.application,
-            'example': self.example,
-            'message': self.message,
-            'model': self.model,
-            'multipart': self.multipart,
-            'inode': self.inode,
-        }
+    def check(self):
+        pass
 
     # ##### Helper functions #####
     def _make_method_dict(self, list_of_tuples):
@@ -194,7 +183,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
 
     def _write_log(self):
         """Print the logs related to the current file being processed."""
-        # TODO: move to helpers
+        # TODO: move to helpers?
         tmp_log = self.logger.log.fields(**self.cur_file.log_details)
         if self.cur_file.is_dangerous():
             tmp_log.warning(self.cur_file.log_string)
@@ -203,6 +192,11 @@ class KittenGroomerFileCheck(KittenGroomerBase):
         else:
             tmp_log.debug(self.cur_file.log_string)
 
+    def has_metadata(self):
+        if self.mimetype in Config.mimes_metadata:
+            return True
+        return False
+
     def _run_process(self, command_string, timeout=None):
         """Run command_string in a subprocess, wait until it finishes."""
         args = shlex.split(command_string)
@@ -547,7 +541,14 @@ class KittenGroomerFileCheck(KittenGroomerBase):
         self.cur_file.log_string += 'Image file'
         self.cur_file.add_log_details('processing_type', 'image')
 
-    #######################
+
+class KittenGroomerFileCheck(KittenGroomerBase):
+
+    def __init__(self, root_src, root_dst, max_recursive_depth=2, debug=False):
+        super(KittenGroomerFileCheck, self).__init__(root_src, root_dst, debug)
+        self.recursive_archive_depth = 0
+        self.max_recursive_depth = max_recursive_depth
+        self.log_name = self.logger.log
 
     def process_file(self, srcpath, dstpath, relative_path):
         self.cur_file = File(srcpath, dstpath, self.logger)
@@ -555,6 +556,8 @@ class KittenGroomerFileCheck(KittenGroomerBase):
                            relative_path,
                            self.cur_file.main_type,
                            self.cur_file.sub_type)
+        self.cur_file.check()
+
         if not self.cur_file.is_dangerous():
             self.mime_processing_options.get(self.cur_file.main_type, self.unknown)()
         else:

From 9aafe6e518ba63067db8781f9a12ccceefc9d297 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Mon, 27 Feb 2017 14:10:56 -0600
Subject: [PATCH 18/36] Remove cur_file from methods in File object

---
 bin/filecheck.py         | 223 +++++++++++++++++++--------------------
 kittengroomer/helpers.py |   2 +-
 2 files changed, 112 insertions(+), 113 deletions(-)

diff --git a/bin/filecheck.py b/bin/filecheck.py
index 8f941f7..bda25dd 100644
--- a/bin/filecheck.py
+++ b/bin/filecheck.py
@@ -170,7 +170,8 @@ class File(FileBase):
                 self.make_dangerous()
 
     def check(self):
-        pass
+        if not self.is_dangerous():
+            self.mime_processing_options.get(self.main_type, self.unknown)()
 
     # ##### Helper functions #####
     def _make_method_dict(self, list_of_tuples):
@@ -184,14 +185,15 @@ class File(FileBase):
     def _write_log(self):
         """Print the logs related to the current file being processed."""
         # TODO: move to helpers?
-        tmp_log = self.logger.log.fields(**self.cur_file.log_details)
-        if self.cur_file.is_dangerous():
-            tmp_log.warning(self.cur_file.log_string)
-        elif self.cur_file.log_details.get('unknown') or self.cur_file.log_details.get('binary'):
-            tmp_log.info(self.cur_file.log_string)
+        tmp_log = self.logger.log.fields(**self.log_details)
+        if self.is_dangerous():
+            tmp_log.warning(self.log_string)
+        elif self.log_details.get('unknown') or self.log_details.get('binary'):
+            tmp_log.info(self.log_string)
         else:
-            tmp_log.debug(self.cur_file.log_string)
+            tmp_log.debug(self.log_string)
 
+    # Make this an @property
     def has_metadata(self):
         if self.mimetype in Config.mimes_metadata:
             return True
@@ -211,198 +213,197 @@ class File(FileBase):
     # ##### Discarded mimetypes, reason in the docstring ######
     def inode(self):
         """Empty file or symlink."""
-        if self.cur_file.is_symlink():
-            self.cur_file.log_string += 'Symlink to {}'.format(self.cur_file.log_details['symlink'])
+        if self.is_symlink():
+            self.log_string += 'Symlink to {}'.format(self.log_details['symlink'])
         else:
-            self.cur_file.log_string += 'Inode file'
+            self.log_string += 'Inode file'
 
     def unknown(self):
         """Main type should never be unknown."""
-        self.cur_file.log_string += 'Unknown file'
+        self.log_string += 'Unknown file'
 
     def example(self):
         """Used in examples, should never be returned by libmagic."""
-        self.cur_file.log_string += 'Example file'
+        self.log_string += 'Example file'
 
     def multipart(self):
         """Used in web apps, should never be returned by libmagic"""
-        self.cur_file.log_string += 'Multipart file'
+        self.log_string += 'Multipart file'
 
     # ##### Treated as malicious, no reason to have it on a USB key ######
     def message(self):
         """Process a message file."""
-        self.cur_file.log_string += 'Message file'
-        self.cur_file.make_dangerous()
+        self.log_string += 'Message file'
+        self.make_dangerous()
         self._safe_copy()
 
     def model(self):
         """Process a model file."""
-        self.cur_file.log_string += 'Model file'
-        self.cur_file.make_dangerous()
+        self.log_string += 'Model file'
+        self.make_dangerous()
         self._safe_copy()
 
     # ##### Files that will be converted ######
     def text(self):
         """Process an rtf, ooxml, or plaintext file."""
         for mt in Config.mimes_rtf:
-            if mt in self.cur_file.sub_type:
-                self.cur_file.log_string += 'Rich Text file'
+            if mt in self.sub_type:
+                self.log_string += 'Rich Text file'
                 # TODO: need a way to convert it to plain text
-                self.cur_file.force_ext('.txt')
+                self.force_ext('.txt')
                 self._safe_copy()
                 return
         for mt in Config.mimes_ooxml:
-            if mt in self.cur_file.sub_type:
-                self.cur_file.log_string += 'OOXML File'
+            if mt in self.sub_type:
+                self.log_string += 'OOXML File'
                 self._ooxml()
                 return
-        self.cur_file.log_string += 'Text file'
-        self.cur_file.force_ext('.txt')
+        self.log_string += 'Text file'
+        self.force_ext('.txt')
         self._safe_copy()
 
     def application(self):
         """Processes an application specific file according to its subtype."""
         for subtype, method in self.app_subtype_methods.items():
-            if subtype in self.cur_file.sub_type:
+            if subtype in self.sub_type:
                 method()
-                self.cur_file.log_string += 'Application file'
+                self.log_string += 'Application file'
                 return
-        self.cur_file.log_string += 'Unknown Application file'
+        self.log_string += 'Unknown Application file'
         self._unknown_app()
 
     def _executables(self):
         """Processes an executable file."""
-        self.cur_file.add_log_details('processing_type', 'executable')
-        self.cur_file.make_dangerous()
+        self.add_log_details('processing_type', 'executable')
+        self.make_dangerous()
         self._safe_copy()
 
     def _winoffice(self):
         """Processes a winoffice file using olefile/oletools."""
-        self.cur_file.add_log_details('processing_type', 'WinOffice')
+        self.add_log_details('processing_type', 'WinOffice')
         # Try as if it is a valid document
-        oid = oletools.oleid.OleID(self.cur_file.src_path)
-        if not olefile.isOleFile(self.cur_file.src_path):
+        oid = oletools.oleid.OleID(self.src_path)
+        if not olefile.isOleFile(self.src_path):
             # Manual processing, may already count as suspicious
             try:
-                ole = olefile.OleFileIO(self.cur_file.src_path, raise_defects=olefile.DEFECT_INCORRECT)
+                ole = olefile.OleFileIO(self.src_path, raise_defects=olefile.DEFECT_INCORRECT)
             except:
-                self.cur_file.add_log_details('not_parsable', True)
-                self.cur_file.make_dangerous()
+                self.add_log_details('not_parsable', True)
+                self.make_dangerous()
             if ole.parsing_issues:
-                self.cur_file.add_log_details('parsing_issues', True)
-                self.cur_file.make_dangerous()
+                self.add_log_details('parsing_issues', True)
+                self.make_dangerous()
             else:
                 if ole.exists('macros/vba') or ole.exists('Macros') \
                         or ole.exists('_VBA_PROJECT_CUR') or ole.exists('VBA'):
-                    self.cur_file.add_log_details('macro', True)
-                    self.cur_file.make_dangerous()
+                    self.add_log_details('macro', True)
+                    self.make_dangerous()
         else:
             indicators = oid.check()
             # Encrypted ban be set by multiple checks on the script
             if oid.encrypted.value:
-                self.cur_file.add_log_details('encrypted', True)
-                self.cur_file.make_dangerous()
+                self.add_log_details('encrypted', True)
+                self.make_dangerous()
             if oid.macros.value or oid.ole.exists('macros/vba') or oid.ole.exists('Macros') \
                     or oid.ole.exists('_VBA_PROJECT_CUR') or oid.ole.exists('VBA'):
-                self.cur_file.add_log_details('macro', True)
-                self.cur_file.make_dangerous()
+                self.add_log_details('macro', True)
+                self.make_dangerous()
             for i in indicators:
                 if i.id == 'ObjectPool' and i.value:
                     # FIXME: Is it suspicious?
-                    self.cur_file.add_log_details('objpool', True)
+                    self.add_log_details('objpool', True)
                 elif i.id == 'flash' and i.value:
-                    self.cur_file.add_log_details('flash', True)
-                    self.cur_file.make_dangerous()
+                    self.add_log_details('flash', True)
+                    self.make_dangerous()
         self._safe_copy()
 
     def _ooxml(self):
         """Processes an ooxml file."""
-        self.cur_file.add_log_details('processing_type', 'ooxml')
+        self.add_log_details('processing_type', 'ooxml')
         try:
-            doc = officedissector.doc.Document(self.cur_file.src_path)
+            doc = officedissector.doc.Document(self.src_path)
         except Exception:
             # Invalid file
-            self.cur_file.make_dangerous()
+            self.make_dangerous()
             self._safe_copy()
             return
         # There are probably other potentially malicious features:
         # fonts, custom props, custom XML
         if doc.is_macro_enabled or len(doc.features.macros) > 0:
-            self.cur_file.add_log_details('macro', True)
-            self.cur_file.make_dangerous()
+            self.add_log_details('macro', True)
+            self.make_dangerous()
         if len(doc.features.embedded_controls) > 0:
-            self.cur_file.add_log_details('activex', True)
-            self.cur_file.make_dangerous()
+            self.add_log_details('activex', True)
+            self.make_dangerous()
         if len(doc.features.embedded_objects) > 0:
             # Exploited by CVE-2014-4114 (OLE)
-            self.cur_file.add_log_details('embedded_obj', True)
-            self.cur_file.make_dangerous()
+            self.add_log_details('embedded_obj', True)
+            self.make_dangerous()
         if len(doc.features.embedded_packages) > 0:
-            self.cur_file.add_log_details('embedded_pack', True)
-            self.cur_file.make_dangerous()
+            self.add_log_details('embedded_pack', True)
+            self.make_dangerous()
         self._safe_copy()
 
     def _libreoffice(self):
         """Processes a libreoffice file."""
-        self.cur_file.add_log_details('processing_type', 'libreoffice')
+        self.add_log_details('processing_type', 'libreoffice')
         # As long as there ar no way to do a sanity check on the files => dangerous
         try:
-            lodoc = zipfile.ZipFile(self.cur_file.src_path, 'r')
+            lodoc = zipfile.ZipFile(self.src_path, 'r')
         except:
-            self.cur_file.add_log_details('invalid', True)
-            self.cur_file.make_dangerous()
+            self.add_log_details('invalid', True)
+            self.make_dangerous()
         for f in lodoc.infolist():
             fname = f.filename.lower()
             if fname.startswith('script') or fname.startswith('basic') or \
                     fname.startswith('object') or fname.endswith('.bin'):
-                self.cur_file.add_log_details('macro', True)
-                self.cur_file.make_dangerous()
+                self.add_log_details('macro', True)
+                self.make_dangerous()
         self._safe_copy()
 
     def _pdf(self):
         """Processes a PDF file."""
-        self.cur_file.add_log_details('processing_type', 'pdf')
-        xmlDoc = PDFiD(self.cur_file.src_path)
+        self.add_log_details('processing_type', 'pdf')
+        xmlDoc = PDFiD(self.src_path)
         oPDFiD = cPDFiD(xmlDoc, True)
         # TODO: other keywords?
         if oPDFiD.encrypt.count > 0:
-            self.cur_file.add_log_details('encrypted', True)
-            self.cur_file.make_dangerous()
+            self.add_log_details('encrypted', True)
+            self.make_dangerous()
         if oPDFiD.js.count > 0 or oPDFiD.javascript.count > 0:
-            self.cur_file.add_log_details('javascript', True)
-            self.cur_file.make_dangerous()
+            self.add_log_details('javascript', True)
+            self.make_dangerous()
         if oPDFiD.aa.count > 0 or oPDFiD.openaction.count > 0:
-            self.cur_file.add_log_details('openaction', True)
-            self.cur_file.make_dangerous()
+            self.add_log_details('openaction', True)
+            self.make_dangerous()
         if oPDFiD.richmedia.count > 0:
-            self.cur_file.add_log_details('flash', True)
-            self.cur_file.make_dangerous()
+            self.add_log_details('flash', True)
+            self.make_dangerous()
         if oPDFiD.launch.count > 0:
-            self.cur_file.add_log_details('launch', True)
-            self.cur_file.make_dangerous()
-        self._safe_copy()
+            self.add_log_details('launch', True)
+            self.make_dangerous()
 
     def _archive(self):
         """Processes an archive using 7zip. The archive is extracted to a
         temporary directory and self.process_dir is called on that directory.
         The recursive archive depth is increased to protect against archive
         bombs."""
-        self.cur_file.add_log_details('processing_type', 'archive')
-        self.cur_file.is_recursive = True
-        self.cur_file.log_string += 'Archive extracted, processing content.'
-        tmpdir = self.cur_file.dst_path + '_temp'
+        self.add_log_details('processing_type', 'archive')
+        self.is_recursive = True
+        self.log_string += 'Archive extracted, processing content.'
+        tmpdir = self.dst_path + '_temp'
         self._safe_mkdir(tmpdir)
-        extract_command = '{} -p1 x "{}" -o"{}" -bd -aoa'.format(SEVENZ_PATH, self.cur_file.src_path, tmpdir)
+        extract_command = '{} -p1 x "{}" -o"{}" -bd -aoa'.format(SEVENZ_PATH, self.src_path, tmpdir)
         self._run_process(extract_command)
         self.recursive_archive_depth += 1
         self.logger.tree(tmpdir)
-        self.process_dir(tmpdir, self.cur_file.dst_path)
+        self.process_dir(tmpdir, self.dst_path)
         self.recursive_archive_depth -= 1
         self._safe_rmtree(tmpdir)
 
     def _handle_archivebomb(self, src_dir):
-        self.cur_file.make_dangerous()
-        self.cur_file.add_log_details('Archive Bomb', True)
+        self.make_dangerous()
+        self.add_log_details('Archive Bomb', True)
         self.log_name.warning('ARCHIVE BOMB.')
         self.log_name.warning('The content of the archive contains recursively other archives.')
         self.log_name.warning('This is a bad sign so the archive is not extracted to the destination key.')
@@ -413,30 +414,30 @@ class File(FileBase):
 
     def _unknown_app(self):
         """Processes an unknown file."""
-        self.cur_file.make_unknown()
+        self.make_unknown()
         self._safe_copy()
 
     def _binary_app(self):
         """Processses an unknown binary file."""
-        self.cur_file.make_binary()
+        self.make_binary()
         self._safe_copy()
 
     #######################
     # Metadata extractors
     def _metadata_exif(self, metadata_file_path):
-        img = open(self.cur_file.src_path, 'rb')
+        img = open(self.src_path, 'rb')
         tags = None
 
         try:
             tags = exifread.process_file(img, debug=True)
         except Exception as e:
-            print("Error while trying to grab full metadata for file {}; retrying for partial data.".format(self.cur_file.src_path))
+            print("Error while trying to grab full metadata for file {}; retrying for partial data.".format(self.src_path))
             print(e)
         if tags is None:
             try:
                 tags = exifread.process_file(img, debug=True)
             except Exception as e:
-                print("Failed to get any metadata for file {}.".format(self.cur_file.src_path))
+                print("Failed to get any metadata for file {}.".format(self.src_path))
                 print(e)
                 img.close()
                 return False
@@ -456,32 +457,32 @@ class File(FileBase):
 
                 with open(metadata_file_path, 'w+') as metadata_file:
                     metadata_file.write("Key: {}\tValue: {}\n".format(tag, printable))
-        self.cur_file.add_log_details('metadata', 'exif')
+        self.add_log_details('metadata', 'exif')
         img.close()
         return True
 
     def _metadata_png(self, metadata_file_path):
         warnings.simplefilter('error', Image.DecompressionBombWarning)
         try:
-            img = Image.open(self.cur_file.src_path)
+            img = Image.open(self.src_path)
             for tag in sorted(img.info.keys()):
                 # These are long and obnoxious/binary
                 if tag not in ('icc_profile'):
                     with open(metadata_file_path, 'w+') as metadata_file:
                         metadata_file.write("Key: {}\tValue: {}\n".format(tag, img.info[tag]))
-            self.cur_file.add_log_details('metadata', 'png')
+            self.add_log_details('metadata', 'png')
             img.close()
         # Catch decompression bombs
         except Exception as e:
-            print("Caught exception processing metadata for {}".format(self.cur_file.src_path))
+            print("Caught exception processing metadata for {}".format(self.src_path))
             print(e)
-            self.cur_file.make_dangerous()
+            self.make_dangerous()
             self._safe_copy()
             return False
 
     def extract_metadata(self):
-        metadata_file_path = self.cur_file.create_metadata_file(".metadata.txt")
-        mt = self.cur_file.mimetype
+        metadata_file_path = self.create_metadata_file(".metadata.txt")
+        mt = self.mimetype
         metadata_processing_method = self.metadata_mimetype_methods.get(mt)
         if metadata_processing_method:
             # TODO: should we return metadata and write it here instead of in processing method?
@@ -491,17 +492,17 @@ class File(FileBase):
     # ##### Media - audio and video aren't converted ######
     def audio(self):
         """Processes an audio file."""
-        self.cur_file.log_string += 'Audio file'
+        self.log_string += 'Audio file'
         self._media_processing()
 
     def video(self):
         """Processes a video."""
-        self.cur_file.log_string += 'Video file'
+        self.log_string += 'Video file'
         self._media_processing()
 
     def _media_processing(self):
         """Generic way to process all media files."""
-        self.cur_file.add_log_details('processing_type', 'media')
+        self.add_log_details('processing_type', 'media')
         self._safe_copy()
 
     def image(self):
@@ -510,12 +511,12 @@ class File(FileBase):
         Extracts metadata if metadata is present. Creates a temporary
         directory, opens the using PIL.Image, saves it to the temporary
         directory, and copies it to the destination."""
-        if self.cur_file.has_metadata():
+        if self.has_metadata():
             self.extract_metadata()
 
         # FIXME make sure this works for png, gif, tiff
         # Create a temp directory
-        dst_dir, filename = os.path.split(self.cur_file.dst_path)
+        dst_dir, filename = os.path.split(self.dst_path)
         tmpdir = os.path.join(dst_dir, 'temp')
         tmppath = os.path.join(tmpdir, filename)
         self._safe_mkdir(tmpdir)
@@ -523,7 +524,7 @@ class File(FileBase):
         # Do our image conversions
         warnings.simplefilter('error', Image.DecompressionBombWarning)
         try:
-            imIn = Image.open(self.cur_file.src_path)
+            imIn = Image.open(self.src_path)
             imOut = Image.frombytes(imIn.mode, imIn.size, imIn.tobytes())
             imOut.save(tmppath)
 
@@ -533,13 +534,13 @@ class File(FileBase):
 
         # Catch decompression bombs
         except Exception as e:
-            print("Caught exception (possible decompression bomb?) while translating file {}.".format(self.cur_file.src_path))
+            print("Caught exception (possible decompression bomb?) while translating file {}.".format(self.src_path))
             print(e)
-            self.cur_file.make_dangerous()
+            self.make_dangerous()
             self._safe_copy()
 
-        self.cur_file.log_string += 'Image file'
-        self.cur_file.add_log_details('processing_type', 'image')
+        self.log_string += 'Image file'
+        self.add_log_details('processing_type', 'image')
 
 
 class KittenGroomerFileCheck(KittenGroomerBase):
@@ -557,22 +558,20 @@ class KittenGroomerFileCheck(KittenGroomerBase):
                            self.cur_file.main_type,
                            self.cur_file.sub_type)
         self.cur_file.check()
-
-        if not self.cur_file.is_dangerous():
-            self.mime_processing_options.get(self.cur_file.main_type, self.unknown)()
+        if self.cur_file.is_archive:
+            # Handle archive
+            pass
         else:
+            # TODO: Check if should be copied, maybe have an attribute for this?
             self._safe_copy()
-        if not self.cur_file.is_recursive:
             self._write_log()
 
     def process_dir(self, src_dir, dst_dir):
         """Main function coordinating file processing."""
         if self.recursive_archive_depth > 0:
             self._write_log()
-
         if self.recursive_archive_depth >= self.max_recursive_depth:
             self._handle_archivebomb(src_dir)
-
         for srcpath in self.list_all_files(src_dir):
             dstpath = srcpath.replace(src_dir, dst_dir)
             relative_path = srcpath.replace(src_dir + '/', '')
diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py
index 9f7cced..78d9dcd 100644
--- a/kittengroomer/helpers.py
+++ b/kittengroomer/helpers.py
@@ -42,7 +42,7 @@ class FileBase(object):
         """Initialized with the source path and expected destination path."""
         self.src_path = src_path
         self.dst_path = dst_path
-        # TODO: rename this. file_information? file_data? file_metadata? file_log?
+        # TODO: rename this to file_properties
         self.log_details = {'filepath': self.src_path}
         self.log_string = ''
         self.extension = self._determine_extension()

From 781d0a76af061d0dc5b5863f45a02b3d64bc9a98 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Wed, 1 Mar 2017 15:24:48 -0500
Subject: [PATCH 19/36] First working version with methods in File object

- All tests now passing with file handling methods on File object
instead of Groomer object.
- Logging functionality still isn't finished.
---
 bin/filecheck.py         | 193 +++++++++++++++++++--------------------
 kittengroomer/helpers.py |   3 +-
 2 files changed, 98 insertions(+), 98 deletions(-)

diff --git a/bin/filecheck.py b/bin/filecheck.py
index bda25dd..6afbd60 100644
--- a/bin/filecheck.py
+++ b/bin/filecheck.py
@@ -86,15 +86,9 @@ class File(FileBase):
     def __init__(self, src_path, dst_path, logger):
         super(File, self).__init__(src_path, dst_path, logger)
         self.is_recursive = False
-        self._check_dangerous()
-        if self.is_dangerous():
-            return
-
         self.log_details.update({'maintype': self.main_type,
                                  'subtype': self.sub_type,
                                  'extension': self.extension})
-        self._check_extension()
-        self._check_mime()
 
         subtypes_apps = [
             (Config.mimes_office, self._winoffice),
@@ -129,8 +123,7 @@ class File(FileBase):
         }
 
     def _check_dangerous(self):
-        if not self.has_mimetype():
-            # No mimetype, should not happen.
+        if not self.has_mimetype():  # No mimetype, should not happen.
             self.make_dangerous()
         if not self.has_extension():
             self.make_dangerous()
@@ -147,7 +140,8 @@ class File(FileBase):
         if self.extension in Config.override_ext:
             expected_mimetype = Config.override_ext[self.extension]
         else:
-            expected_mimetype, encoding = mimetypes.guess_type(self.src_path, strict=False)
+            expected_mimetype, encoding = mimetypes.guess_type(self.src_path,
+                                                               strict=False)
             if expected_mimetype in Config.aliases:
                 expected_mimetype = Config.aliases[expected_mimetype]
         is_known_extension = self.extension in mimetypes.types_map.keys()
@@ -155,7 +149,7 @@ class File(FileBase):
             self.log_details.update({'expected_mimetype': expected_mimetype})
             self.make_dangerous()
 
-    def _check_mime(self):
+    def _check_mimetype(self):
         """Takes the mimetype (as determined by libmagic) and determines
         whether the list of extensions that are normally associated with
         that extension contains the file's actual extension."""
@@ -163,13 +157,17 @@ class File(FileBase):
             mimetype = Config.aliases[self.mimetype]
         else:
             mimetype = self.mimetype
-        expected_extensions = mimetypes.guess_all_extensions(mimetype, strict=False)
+        expected_extensions = mimetypes.guess_all_extensions(mimetype,
+                                                             strict=False)
         if expected_extensions:
-            if len(self.extension) > 0 and self.extension not in expected_extensions:
+            if self.has_extension() and self.extension not in expected_extensions:
                 self.log_details.update({'expected_extensions': expected_extensions})
                 self.make_dangerous()
 
     def check(self):
+        self._check_dangerous()
+        self._check_extension()
+        self._check_mimetype()
         if not self.is_dangerous():
             self.mime_processing_options.get(self.main_type, self.unknown)()
 
@@ -182,7 +180,7 @@ class File(FileBase):
                 dict_to_return[subtype] = method
         return dict_to_return
 
-    def _write_log(self):
+    def write_log(self):
         """Print the logs related to the current file being processed."""
         # TODO: move to helpers?
         tmp_log = self.logger.log.fields(**self.log_details)
@@ -209,6 +207,13 @@ class File(FileBase):
                 return
         return True
 
+    def _make_tempdir(self):
+        """Make a temporary directory."""
+        self.tempdir_path = self.dst_path + '_temp'
+        if not os.path.exists(self.tempdir_path):
+            os.makedirs(self.tempdir_path)
+        return self.tempdir_path
+
     #######################
     # ##### Discarded mimetypes, reason in the docstring ######
     def inode(self):
@@ -235,13 +240,11 @@ class File(FileBase):
         """Process a message file."""
         self.log_string += 'Message file'
         self.make_dangerous()
-        self._safe_copy()
 
     def model(self):
         """Process a model file."""
         self.log_string += 'Model file'
         self.make_dangerous()
-        self._safe_copy()
 
     # ##### Files that will be converted ######
     def text(self):
@@ -251,7 +254,6 @@ class File(FileBase):
                 self.log_string += 'Rich Text file'
                 # TODO: need a way to convert it to plain text
                 self.force_ext('.txt')
-                self._safe_copy()
                 return
         for mt in Config.mimes_ooxml:
             if mt in self.sub_type:
@@ -260,7 +262,6 @@ class File(FileBase):
                 return
         self.log_string += 'Text file'
         self.force_ext('.txt')
-        self._safe_copy()
 
     def application(self):
         """Processes an application specific file according to its subtype."""
@@ -276,7 +277,6 @@ class File(FileBase):
         """Processes an executable file."""
         self.add_log_details('processing_type', 'executable')
         self.make_dangerous()
-        self._safe_copy()
 
     def _winoffice(self):
         """Processes a winoffice file using olefile/oletools."""
@@ -315,7 +315,6 @@ class File(FileBase):
                 elif i.id == 'flash' and i.value:
                     self.add_log_details('flash', True)
                     self.make_dangerous()
-        self._safe_copy()
 
     def _ooxml(self):
         """Processes an ooxml file."""
@@ -325,7 +324,6 @@ class File(FileBase):
         except Exception:
             # Invalid file
             self.make_dangerous()
-            self._safe_copy()
             return
         # There are probably other potentially malicious features:
         # fonts, custom props, custom XML
@@ -342,7 +340,6 @@ class File(FileBase):
         if len(doc.features.embedded_packages) > 0:
             self.add_log_details('embedded_pack', True)
             self.make_dangerous()
-        self._safe_copy()
 
     def _libreoffice(self):
         """Processes a libreoffice file."""
@@ -359,7 +356,6 @@ class File(FileBase):
                     fname.startswith('object') or fname.endswith('.bin'):
                 self.add_log_details('macro', True)
                 self.make_dangerous()
-        self._safe_copy()
 
     def _pdf(self):
         """Processes a PDF file."""
@@ -390,37 +386,15 @@ class File(FileBase):
         bombs."""
         self.add_log_details('processing_type', 'archive')
         self.is_recursive = True
-        self.log_string += 'Archive extracted, processing content.'
-        tmpdir = self.dst_path + '_temp'
-        self._safe_mkdir(tmpdir)
-        extract_command = '{} -p1 x "{}" -o"{}" -bd -aoa'.format(SEVENZ_PATH, self.src_path, tmpdir)
-        self._run_process(extract_command)
-        self.recursive_archive_depth += 1
-        self.logger.tree(tmpdir)
-        self.process_dir(tmpdir, self.dst_path)
-        self.recursive_archive_depth -= 1
-        self._safe_rmtree(tmpdir)
-
-    def _handle_archivebomb(self, src_dir):
-        self.make_dangerous()
-        self.add_log_details('Archive Bomb', True)
-        self.log_name.warning('ARCHIVE BOMB.')
-        self.log_name.warning('The content of the archive contains recursively other archives.')
-        self.log_name.warning('This is a bad sign so the archive is not extracted to the destination key.')
-        self._safe_rmtree(src_dir)
-        if src_dir.endswith('_temp'):
-            bomb_path = src_dir[:-len('_temp')]
-            self._safe_remove(bomb_path)
+        # self.log_string += 'Archive extracted, processing content.'
 
     def _unknown_app(self):
         """Processes an unknown file."""
         self.make_unknown()
-        self._safe_copy()
 
     def _binary_app(self):
         """Processses an unknown binary file."""
         self.make_binary()
-        self._safe_copy()
 
     #######################
     # Metadata extractors
@@ -431,12 +405,14 @@ class File(FileBase):
         try:
             tags = exifread.process_file(img, debug=True)
         except Exception as e:
+            # TODO: log instead of print
             print("Error while trying to grab full metadata for file {}; retrying for partial data.".format(self.src_path))
             print(e)
         if tags is None:
             try:
                 tags = exifread.process_file(img, debug=True)
             except Exception as e:
+                # TODO: log instead of print
                 print("Failed to get any metadata for file {}.".format(self.src_path))
                 print(e)
                 img.close()
@@ -450,10 +426,7 @@ class File(FileBase):
                 # Exifreader truncates data.
                 if len(printable) > 25 and printable.endswith(", ... ]"):
                     value = tags[tag].values
-                    if isinstance(value, str):
-                        printable = value
-                    else:
-                        printable = str(value)
+                    printable = str(value)
 
                 with open(metadata_file_path, 'w+') as metadata_file:
                     metadata_file.write("Key: {}\tValue: {}\n".format(tag, printable))
@@ -474,10 +447,10 @@ class File(FileBase):
             img.close()
         # Catch decompression bombs
         except Exception as e:
+            # TODO: log instead of print
             print("Caught exception processing metadata for {}".format(self.src_path))
             print(e)
             self.make_dangerous()
-            self._safe_copy()
             return False
 
     def extract_metadata(self):
@@ -503,42 +476,29 @@ class File(FileBase):
     def _media_processing(self):
         """Generic way to process all media files."""
         self.add_log_details('processing_type', 'media')
-        self._safe_copy()
 
     def image(self):
         """Processes an image.
 
-        Extracts metadata if metadata is present. Creates a temporary
-        directory, opens the using PIL.Image, saves it to the temporary
-        directory, and copies it to the destination."""
+        Extracts metadata to dest key if metadata is present. Creates a
+        temporary directory on dest key, opens the using PIL.Image,saves it to
+        the temporary directory, and copies it to the destination."""
+        # TODO: make sure this method works for png, gif, tiff
         if self.has_metadata():
             self.extract_metadata()
-
-        # FIXME make sure this works for png, gif, tiff
-        # Create a temp directory
-        dst_dir, filename = os.path.split(self.dst_path)
-        tmpdir = os.path.join(dst_dir, 'temp')
-        tmppath = os.path.join(tmpdir, filename)
-        self._safe_mkdir(tmpdir)
-
-        # Do our image conversions
+        tempdir_path = self._make_tempdir()
+        tempfile_path = os.path.join(tempdir_path, self.filename)
         warnings.simplefilter('error', Image.DecompressionBombWarning)
-        try:
-            imIn = Image.open(self.src_path)
-            imOut = Image.frombytes(imIn.mode, imIn.size, imIn.tobytes())
-            imOut.save(tmppath)
-
-            # Copy the file back out and cleanup
-            self._safe_copy(tmppath)
-            self._safe_rmtree(tmpdir)
-
-        # Catch decompression bombs
-        except Exception as e:
+        try:  # Do image conversions
+            img_in = Image.open(self.src_path)
+            img_out = Image.frombytes(img_in.mode, img_in.size, img_in.tobytes())
+            img_out.save(tempfile_path)
+            self.src_path = tempfile_path
+        except Exception as e:  # Catch decompression bombs
+            # TODO: change this from printing to logging
             print("Caught exception (possible decompression bomb?) while translating file {}.".format(self.src_path))
             print(e)
             self.make_dangerous()
-            self._safe_copy()
-
         self.log_string += 'Image file'
         self.add_log_details('processing_type', 'image')
 
@@ -549,33 +509,72 @@ class KittenGroomerFileCheck(KittenGroomerBase):
         super(KittenGroomerFileCheck, self).__init__(root_src, root_dst, debug)
         self.recursive_archive_depth = 0
         self.max_recursive_depth = max_recursive_depth
-        self.log_name = self.logger.log
-
-    def process_file(self, srcpath, dstpath, relative_path):
-        self.cur_file = File(srcpath, dstpath, self.logger)
-        self.log_name.info('Processing {} ({}/{})',
-                           relative_path,
-                           self.cur_file.main_type,
-                           self.cur_file.sub_type)
-        self.cur_file.check()
-        if self.cur_file.is_archive:
-            # Handle archive
-            pass
-        else:
-            # TODO: Check if should be copied, maybe have an attribute for this?
-            self._safe_copy()
-            self._write_log()
 
     def process_dir(self, src_dir, dst_dir):
         """Main function coordinating file processing."""
-        if self.recursive_archive_depth > 0:
-            self._write_log()
-        if self.recursive_archive_depth >= self.max_recursive_depth:
-            self._handle_archivebomb(src_dir)
+        # TODO: Think we want to move write_log elsewhere:
+        # if self.recursive_archive_depth > 0:
+        #     self.write_log()
+        # TODO: Can we clean up the way we handle relative_path?
         for srcpath in self.list_all_files(src_dir):
             dstpath = srcpath.replace(src_dir, dst_dir)
             relative_path = srcpath.replace(src_dir + '/', '')
-            self.process_file(srcpath, dstpath, relative_path)
+            self.cur_file = File(srcpath, dstpath, self.logger)
+            # TODO: move this logging code elsewhere
+            self.logger.log.info('Processing {} ({}/{})',
+                                 relative_path,
+                                 self.cur_file.main_type,
+                                 self.cur_file.sub_type)
+            self.process_file(self.cur_file)
+
+    def process_file(self, file):
+        file.check()
+        if file.is_recursive:
+            self.process_archive(file)
+        else:
+            # TODO: Check if should be copied, make an attribute for should be copied True/False
+            self._safe_copy()
+        file.write_log()
+        if hasattr(file, "tempdir_path"):
+            self._safe_rmtree(file.tempdir_path)
+
+    def process_archive(self, file):
+        """Unpacks an archive using 7zip and processes contents.
+
+        Should be given a Kittengroomer file object whose src_path points
+        to an archive."""
+        self.recursive_archive_depth += 1
+        # Check for archivebomb
+        if self.recursive_archive_depth >= self.max_recursive_depth:
+            self._handle_archivebomb(file)
+        else:
+            tempdir_path = file._make_tempdir()
+            # Unpack the archive
+            base_command = '{} -p1 x "{}" -o"{}" -bd -aoa'
+            extract_command = base_command.format(SEVENZ_PATH, file.src_path, tempdir_path)
+            file._run_process(extract_command)
+            # Add it to the tree
+            self.logger.tree(tempdir_path)
+            # List all files, process them
+            self.process_dir(tempdir_path, file.dst_path)
+            # Clean up
+            self._safe_rmtree(tempdir_path)
+        self.recursive_archive_depth -= 1
+
+
+    def _handle_archivebomb(self, file):
+        file.make_dangerous()
+        file.add_log_details('Archive Bomb', True)
+        self.logger.log.warning('ARCHIVE BOMB.')
+        self.logger.log.warning('The content of the archive contains recursively other archives.')
+        self.logger.log.warning('This is a bad sign so the archive is not extracted to the destination key.')
+        # TODO: are we sure we want to delete the archive on the source key? Commenting out for now
+        # self._safe_rmtree(file.src_dir)
+        # What is the goal of this code:
+        # if file.src_dir.endswith('_temp'):
+        #     # TODO: change the way bomb_path is constructed and the way we check for tempdir
+        #     bomb_path = file.src_dir[:-len('_temp')]
+        #     self._safe_remove(bomb_path)
 
     def run(self):
         self.process_dir(self.src_root_dir, self.dst_root_dir)
diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py
index 78d9dcd..2efde64 100644
--- a/kittengroomer/helpers.py
+++ b/kittengroomer/helpers.py
@@ -42,12 +42,13 @@ class FileBase(object):
         """Initialized with the source path and expected destination path."""
         self.src_path = src_path
         self.dst_path = dst_path
-        # TODO: rename this to file_properties
+        # TODO: rename this to file_properties (and change in other groomers)
         self.log_details = {'filepath': self.src_path}
         self.log_string = ''
         self.extension = self._determine_extension()
         self._determine_mimetype()
         self.logger = logger
+        self.filename = os.path.basename(self.src_path)
 
     def _determine_extension(self):
         _, ext = os.path.splitext(self.src_path)

From 8d7dd1197f6d59160848bb6c7e38df3187e87aa0 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Wed, 1 Mar 2017 17:43:43 -0500
Subject: [PATCH 20/36] Move run_process back to Groomer object

---
 .travis.yml      |  4 ++--
 bin/filecheck.py | 60 ++++++++++++++++++++++--------------------------
 2 files changed, 29 insertions(+), 35 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index b778bf8..786eeab 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -66,8 +66,8 @@ install:
     - rm fraunhoferlibrary.zip
     - 7z x -p42 42.zip
     # Some random samples
-    - wget http://www.sample-videos.com/audio/mp3/india-national-anthem.mp3
-    - wget http://www.sample-videos.com/video/mp4/720/big_buck_bunny_720p_1mb.mp4
+    # - wget http://www.sample-videos.com/audio/mp3/india-national-anthem.mp3
+    # - wget http://www.sample-videos.com/video/mp4/720/big_buck_bunny_720p_1mb.mp4
     - wget http://thewalter.net/stef/software/rtfx/sample.rtf
     - popd
 
diff --git a/bin/filecheck.py b/bin/filecheck.py
index 6afbd60..715aa44 100644
--- a/bin/filecheck.py
+++ b/bin/filecheck.py
@@ -182,7 +182,7 @@ class File(FileBase):
 
     def write_log(self):
         """Print the logs related to the current file being processed."""
-        # TODO: move to helpers?
+        # TODO: move to helpers.py?
         tmp_log = self.logger.log.fields(**self.log_details)
         if self.is_dangerous():
             tmp_log.warning(self.log_string)
@@ -191,23 +191,13 @@ class File(FileBase):
         else:
             tmp_log.debug(self.log_string)
 
-    # Make this an @property
+    # TODO: Make this an @property
     def has_metadata(self):
         if self.mimetype in Config.mimes_metadata:
             return True
         return False
 
-    def _run_process(self, command_string, timeout=None):
-        """Run command_string in a subprocess, wait until it finishes."""
-        args = shlex.split(command_string)
-        with open(self.logger.log_debug_err, 'ab') as stderr, open(self.logger.log_debug_out, 'ab') as stdout:
-            try:
-                subprocess.check_call(args, stdout=stdout, stderr=stderr, timeout=timeout)
-            except (subprocess.TimeoutExpired, subprocess.CalledProcessError):
-                return
-        return True
-
-    def _make_tempdir(self):
+    def make_tempdir(self):
         """Make a temporary directory."""
         self.tempdir_path = self.dst_path + '_temp'
         if not os.path.exists(self.tempdir_path):
@@ -348,6 +338,7 @@ class File(FileBase):
         try:
             lodoc = zipfile.ZipFile(self.src_path, 'r')
         except:
+            # TODO: are there specific exceptions we should catch here? Or is anything ok
             self.add_log_details('invalid', True)
             self.make_dangerous()
         for f in lodoc.infolist():
@@ -362,7 +353,7 @@ class File(FileBase):
         self.add_log_details('processing_type', 'pdf')
         xmlDoc = PDFiD(self.src_path)
         oPDFiD = cPDFiD(xmlDoc, True)
-        # TODO: other keywords?
+        # TODO: are there other characteristics which should be dangerous?
         if oPDFiD.encrypt.count > 0:
             self.add_log_details('encrypted', True)
             self.make_dangerous()
@@ -399,6 +390,7 @@ class File(FileBase):
     #######################
     # Metadata extractors
     def _metadata_exif(self, metadata_file_path):
+        # TODO: this method is kind of long, can we shorten it?
         img = open(self.src_path, 'rb')
         tags = None
 
@@ -486,7 +478,7 @@ class File(FileBase):
         # TODO: make sure this method works for png, gif, tiff
         if self.has_metadata():
             self.extract_metadata()
-        tempdir_path = self._make_tempdir()
+        tempdir_path = self.make_tempdir()
         tempfile_path = os.path.join(tempdir_path, self.filename)
         warnings.simplefilter('error', Image.DecompressionBombWarning)
         try:  # Do image conversions
@@ -495,6 +487,7 @@ class File(FileBase):
             img_out.save(tempfile_path)
             self.src_path = tempfile_path
         except Exception as e:  # Catch decompression bombs
+            # TODO: change this from all Exceptions to specific DecompressionBombWarning
             # TODO: change this from printing to logging
             print("Caught exception (possible decompression bomb?) while translating file {}.".format(self.src_path))
             print(e)
@@ -512,7 +505,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
 
     def process_dir(self, src_dir, dst_dir):
         """Main function coordinating file processing."""
-        # TODO: Think we want to move write_log elsewhere:
+        # TODO: we probably want to move this write_log elsewhere:
         # if self.recursive_archive_depth > 0:
         #     self.write_log()
         # TODO: Can we clean up the way we handle relative_path?
@@ -532,7 +525,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
         if file.is_recursive:
             self.process_archive(file)
         else:
-            # TODO: Check if should be copied, make an attribute for should be copied True/False
+            # TODO: Make a File attribute for should be copied True/False and check it
             self._safe_copy()
         file.write_log()
         if hasattr(file, "tempdir_path"):
@@ -548,33 +541,34 @@ class KittenGroomerFileCheck(KittenGroomerBase):
         if self.recursive_archive_depth >= self.max_recursive_depth:
             self._handle_archivebomb(file)
         else:
-            tempdir_path = file._make_tempdir()
-            # Unpack the archive
-            base_command = '{} -p1 x "{}" -o"{}" -bd -aoa'
-            extract_command = base_command.format(SEVENZ_PATH, file.src_path, tempdir_path)
-            file._run_process(extract_command)
-            # Add it to the tree
+            tempdir_path = file.make_tempdir()
+            command_str = '{} -p1 x "{}" -o"{}" -bd -aoa'
+            unpack_command = command_str.format(SEVENZ_PATH,
+                                                file.src_path, tempdir_path)
+            self._run_process(unpack_command)
+            # TODO: check that tree is working correctly here
             self.logger.tree(tempdir_path)
-            # List all files, process them
             self.process_dir(tempdir_path, file.dst_path)
-            # Clean up
             self._safe_rmtree(tempdir_path)
         self.recursive_archive_depth -= 1
 
-
     def _handle_archivebomb(self, file):
         file.make_dangerous()
         file.add_log_details('Archive Bomb', True)
         self.logger.log.warning('ARCHIVE BOMB.')
         self.logger.log.warning('The content of the archive contains recursively other archives.')
         self.logger.log.warning('This is a bad sign so the archive is not extracted to the destination key.')
-        # TODO: are we sure we want to delete the archive on the source key? Commenting out for now
-        # self._safe_rmtree(file.src_dir)
-        # What is the goal of this code:
-        # if file.src_dir.endswith('_temp'):
-        #     # TODO: change the way bomb_path is constructed and the way we check for tempdir
-        #     bomb_path = file.src_dir[:-len('_temp')]
-        #     self._safe_remove(bomb_path)
+        # TODO: delete whatever we want to delete that's already been copied to dest dir
+
+    def _run_process(self, command_string, timeout=None):
+        """Run command_string in a subprocess, wait until it finishes."""
+        args = shlex.split(command_string)
+        with open(self.logger.log_debug_err, 'ab') as stderr, open(self.logger.log_debug_out, 'ab') as stdout:
+            try:
+                subprocess.check_call(args, stdout=stdout, stderr=stderr, timeout=timeout)
+            except (subprocess.TimeoutExpired, subprocess.CalledProcessError):
+                return
+        return True
 
     def run(self):
         self.process_dir(self.src_root_dir, self.dst_root_dir)

From 9832101c85bda9e96330b768c16d9ef00331b562 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Mon, 27 Feb 2017 10:58:10 -0600
Subject: [PATCH 21/36] Identify TODOs that are log related

---
 bin/filecheck.py         | 20 ++++++++++----------
 kittengroomer/helpers.py | 14 +++++++-------
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/bin/filecheck.py b/bin/filecheck.py
index 715aa44..ccf0b0e 100644
--- a/bin/filecheck.py
+++ b/bin/filecheck.py
@@ -182,7 +182,7 @@ class File(FileBase):
 
     def write_log(self):
         """Print the logs related to the current file being processed."""
-        # TODO: move to helpers.py?
+        # LOG: move to helpers.py?
         tmp_log = self.logger.log.fields(**self.log_details)
         if self.is_dangerous():
             tmp_log.warning(self.log_string)
@@ -191,7 +191,7 @@ class File(FileBase):
         else:
             tmp_log.debug(self.log_string)
 
-    # TODO: Make this an @property
+    @property
     def has_metadata(self):
         if self.mimetype in Config.mimes_metadata:
             return True
@@ -397,14 +397,14 @@ class File(FileBase):
         try:
             tags = exifread.process_file(img, debug=True)
         except Exception as e:
-            # TODO: log instead of print
+            # LOG: log instead of print
             print("Error while trying to grab full metadata for file {}; retrying for partial data.".format(self.src_path))
             print(e)
         if tags is None:
             try:
                 tags = exifread.process_file(img, debug=True)
             except Exception as e:
-                # TODO: log instead of print
+                # LOG: log instead of print
                 print("Failed to get any metadata for file {}.".format(self.src_path))
                 print(e)
                 img.close()
@@ -439,7 +439,7 @@ class File(FileBase):
             img.close()
         # Catch decompression bombs
         except Exception as e:
-            # TODO: log instead of print
+            # LOG: log instead of print
             print("Caught exception processing metadata for {}".format(self.src_path))
             print(e)
             self.make_dangerous()
@@ -476,7 +476,7 @@ class File(FileBase):
         temporary directory on dest key, opens the using PIL.Image,saves it to
         the temporary directory, and copies it to the destination."""
         # TODO: make sure this method works for png, gif, tiff
-        if self.has_metadata():
+        if self.has_metadata:
             self.extract_metadata()
         tempdir_path = self.make_tempdir()
         tempfile_path = os.path.join(tempdir_path, self.filename)
@@ -488,7 +488,7 @@ class File(FileBase):
             self.src_path = tempfile_path
         except Exception as e:  # Catch decompression bombs
             # TODO: change this from all Exceptions to specific DecompressionBombWarning
-            # TODO: change this from printing to logging
+            # LOG: change this from printing to logging
             print("Caught exception (possible decompression bomb?) while translating file {}.".format(self.src_path))
             print(e)
             self.make_dangerous()
@@ -505,7 +505,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
 
     def process_dir(self, src_dir, dst_dir):
         """Main function coordinating file processing."""
-        # TODO: we probably want to move this write_log elsewhere:
+        # LOG: we probably want to move this write_log elsewhere:
         # if self.recursive_archive_depth > 0:
         #     self.write_log()
         # TODO: Can we clean up the way we handle relative_path?
@@ -513,7 +513,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
             dstpath = srcpath.replace(src_dir, dst_dir)
             relative_path = srcpath.replace(src_dir + '/', '')
             self.cur_file = File(srcpath, dstpath, self.logger)
-            # TODO: move this logging code elsewhere
+            # LOG: move this logging code elsewhere
             self.logger.log.info('Processing {} ({}/{})',
                                  relative_path,
                                  self.cur_file.main_type,
@@ -546,7 +546,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
             unpack_command = command_str.format(SEVENZ_PATH,
                                                 file.src_path, tempdir_path)
             self._run_process(unpack_command)
-            # TODO: check that tree is working correctly here
+            # LOG: check that tree is working correctly here
             self.logger.tree(tempdir_path)
             self.process_dir(tempdir_path, file.dst_path)
             self._safe_rmtree(tempdir_path)
diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py
index 2efde64..a2c17a2 100644
--- a/kittengroomer/helpers.py
+++ b/kittengroomer/helpers.py
@@ -173,12 +173,12 @@ class FileBase(object):
                 self.metadata_file_path = self.dst_path + ext
                 return self.metadata_file_path
         except KittenGroomerError as e:
-            # TODO: Write to log file
+            # LOG: Write to log file
             return False
 
 
-class GroomerLog(object):
-    """Groomer logging object"""
+class GroomerLogger(object):
+    """Groomer logging interface"""
 
     def __init__(self, root_dir, debug=False):
         self.log_dir_path = os.path.join(root_dir, 'logs')
@@ -223,8 +223,8 @@ class GroomerLog(object):
                 s.update(buf)
         return s.hexdigest()
 
-    def add_file(self):
-        # File object will add itself
+    def add_file(self, file):
+        # File object will add itself?
         # return a sublog for the file
         pass
 
@@ -241,7 +241,7 @@ class KittenGroomerBase(object):
         self.dst_root_dir = root_dst
         self.debug = debug
         self.cur_file = None
-        self.logger = GroomerLog(self.dst_root_dir, debug)
+        self.logger = GroomerLogger(self.dst_root_dir, debug)
 
     # ##### Helpers #####
     def _safe_rmtree(self, directory):
@@ -271,7 +271,7 @@ class KittenGroomerBase(object):
             shutil.copy(src, dst)
             return True
         except Exception as e:
-            # TODO: Logfile
+            # LOG: Logfile
             print(e)
             return False
 

From b6c01db1fb004f8820f59d199f65c47aa0db4897 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Fri, 3 Mar 2017 15:46:37 -0500
Subject: [PATCH 22/36] Split mimetype methods

- Instead of one large function that mutates FileBase properties,
mimetype and main/subtype are determined by two separate methods
that return mimetypes.
- The API is not changed.
- Absence of mimetype is now None instead of an empty string.
---
 kittengroomer/helpers.py | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py
index a2c17a2..9ba2b5e 100644
--- a/kittengroomer/helpers.py
+++ b/kittengroomer/helpers.py
@@ -27,7 +27,6 @@ class KittenGroomerError(Exception):
 
 class ImplementationRequired(KittenGroomerError):
     """Implementation required error."""
-
     pass
 
 
@@ -46,7 +45,9 @@ class FileBase(object):
         self.log_details = {'filepath': self.src_path}
         self.log_string = ''
         self.extension = self._determine_extension()
-        self._determine_mimetype()
+        self.mimetype = self._determine_mimetype()
+        if self.mimetype:
+            self.main_type, self.sub_type = self._split_subtypes(self.mimetype)
         self.logger = logger
         self.filename = os.path.basename(self.src_path)
 
@@ -57,24 +58,27 @@ class FileBase(object):
     def _determine_mimetype(self):
         if os.path.islink(self.src_path):
             # magic will throw an IOError on a broken symlink
-            self.mimetype = 'inode/symlink'
+            mimetype = 'inode/symlink'
         else:
             try:
                 mt = magic.from_file(self.src_path, mime=True)
                 # Note: magic will always return something, even if it's just 'data'
             except UnicodeEncodeError as e:
                 # FIXME: The encoding of the file is broken (possibly UTF-16)
-                mt = ''
+                mt = None
                 self.log_details.update({'UnicodeError': e})
             try:
-                self.mimetype = mt.decode("utf-8")
+                mimetype = mt.decode("utf-8")
             except:
-                self.mimetype = mt
-        if self.mimetype and '/' in self.mimetype:
-            self.main_type, self.sub_type = self.mimetype.split('/')
+                mimetype = mt
+        return mimetype
+
+    def _split_subtypes(self, mimetype):
+        if '/' in mimetype:
+            main_type, sub_type = mimetype.split('/')
         else:
-            self.main_type = ''
-            self.sub_type = ''
+            main_type, sub_type = None, None
+        return main_type, sub_type
 
     def has_mimetype(self):
         """
@@ -243,7 +247,6 @@ class KittenGroomerBase(object):
         self.cur_file = None
         self.logger = GroomerLogger(self.dst_root_dir, debug)
 
-    # ##### Helpers #####
     def _safe_rmtree(self, directory):
         """Remove a directory tree if it exists."""
         if os.path.exists(directory):

From 1c58a7347e92b9a5378bed44ecf357eafb83b94f Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Mon, 6 Mar 2017 14:55:33 -0500
Subject: [PATCH 23/36] If no extentions FileBase.ext is now None

---
 kittengroomer/helpers.py    | 5 ++++-
 tests/test_kittengroomer.py | 1 -
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py
index 9ba2b5e..3e270dc 100644
--- a/kittengroomer/helpers.py
+++ b/kittengroomer/helpers.py
@@ -53,7 +53,10 @@ class FileBase(object):
 
     def _determine_extension(self):
         _, ext = os.path.splitext(self.src_path)
-        return ext.lower()
+        ext = ext.lower()
+        if ext == '':
+            ext = None
+        return ext
 
     def _determine_mimetype(self):
         if os.path.islink(self.src_path):
diff --git a/tests/test_kittengroomer.py b/tests/test_kittengroomer.py
index 8339e84..bb5ab5d 100644
--- a/tests/test_kittengroomer.py
+++ b/tests/test_kittengroomer.py
@@ -130,7 +130,6 @@ class TestFileBase:
     def test_has_extension(self, temp_file, temp_file_no_ext):
         assert temp_file.has_extension() is True
         assert temp_file_no_ext.has_extension() is False
-        assert temp_file_no_ext.log_details.get('no_extension') is True
 
     def test_add_log_details(self, generic_conf_file):
         generic_conf_file.add_log_details('test', True)

From 12d5624b4dcf37c94847582e923493b7cca6488e Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Mon, 6 Mar 2017 15:02:29 -0500
Subject: [PATCH 24/36] Change FileBase.log_details to Filebase._file_props

* _file_props is a dict that will hold all information about the file
* Updated filecheck.py to reflect this
* Potentially will change contents of file_props to being attributes on the
file in the future. This change would be easy since all access to _file_props
is now via set_property and get_property methods.
* Add filename to _file_props
---
 bin/filecheck.py            | 75 +++++++++++++++----------------
 kittengroomer/__init__.py   |  2 +-
 kittengroomer/helpers.py    | 88 +++++++++++++++++++++----------------
 tests/test_kittengroomer.py | 67 ++++++++++++++--------------
 4 files changed, 121 insertions(+), 111 deletions(-)

diff --git a/bin/filecheck.py b/bin/filecheck.py
index ccf0b0e..a49297d 100644
--- a/bin/filecheck.py
+++ b/bin/filecheck.py
@@ -86,9 +86,6 @@ class File(FileBase):
     def __init__(self, src_path, dst_path, logger):
         super(File, self).__init__(src_path, dst_path, logger)
         self.is_recursive = False
-        self.log_details.update({'maintype': self.main_type,
-                                 'subtype': self.sub_type,
-                                 'extension': self.extension})
 
         subtypes_apps = [
             (Config.mimes_office, self._winoffice),
@@ -128,7 +125,7 @@ class File(FileBase):
         if not self.has_extension():
             self.make_dangerous()
         if self.extension in Config.malicious_exts:
-            self.log_details.update({'malicious_extension': self.extension})
+            self.set_property('malicious_extension', self.extension)
             self.make_dangerous()
 
     def _check_extension(self):
@@ -146,7 +143,7 @@ class File(FileBase):
                 expected_mimetype = Config.aliases[expected_mimetype]
         is_known_extension = self.extension in mimetypes.types_map.keys()
         if is_known_extension and expected_mimetype != self.mimetype:
-            self.log_details.update({'expected_mimetype': expected_mimetype})
+            self.set_property('expected_mimetype', expected_mimetype)
             self.make_dangerous()
 
     def _check_mimetype(self):
@@ -161,7 +158,7 @@ class File(FileBase):
                                                              strict=False)
         if expected_extensions:
             if self.has_extension() and self.extension not in expected_extensions:
-                self.log_details.update({'expected_extensions': expected_extensions})
+                self.set_property('expected_extensions', expected_extensions)
                 self.make_dangerous()
 
     def check(self):
@@ -182,11 +179,11 @@ class File(FileBase):
 
     def write_log(self):
         """Print the logs related to the current file being processed."""
-        # LOG: move to helpers.py?
-        tmp_log = self.logger.log.fields(**self.log_details)
+        # LOG: move to helpers.py and change
+        tmp_log = self.logger.log.fields(**self._file_props)
         if self.is_dangerous():
             tmp_log.warning(self.log_string)
-        elif self.log_details.get('unknown') or self.log_details.get('binary'):
+        elif self.get_property('unknown') or self.get_property('binary'):
             tmp_log.info(self.log_string)
         else:
             tmp_log.debug(self.log_string)
@@ -209,7 +206,7 @@ class File(FileBase):
     def inode(self):
         """Empty file or symlink."""
         if self.is_symlink():
-            self.log_string += 'Symlink to {}'.format(self.log_details['symlink'])
+            self.log_string += 'Symlink to {}'.format(self.get_property('symlink'))
         else:
             self.log_string += 'Inode file'
 
@@ -265,12 +262,12 @@ class File(FileBase):
 
     def _executables(self):
         """Processes an executable file."""
-        self.add_log_details('processing_type', 'executable')
+        self.set_property('processing_type', 'executable')
         self.make_dangerous()
 
     def _winoffice(self):
         """Processes a winoffice file using olefile/oletools."""
-        self.add_log_details('processing_type', 'WinOffice')
+        self.set_property('processing_type', 'WinOffice')
         # Try as if it is a valid document
         oid = oletools.oleid.OleID(self.src_path)
         if not olefile.isOleFile(self.src_path):
@@ -278,37 +275,37 @@ class File(FileBase):
             try:
                 ole = olefile.OleFileIO(self.src_path, raise_defects=olefile.DEFECT_INCORRECT)
             except:
-                self.add_log_details('not_parsable', True)
+                self.set_property('not_parsable', True)
                 self.make_dangerous()
             if ole.parsing_issues:
-                self.add_log_details('parsing_issues', True)
+                self.set_property('parsing_issues', True)
                 self.make_dangerous()
             else:
                 if ole.exists('macros/vba') or ole.exists('Macros') \
                         or ole.exists('_VBA_PROJECT_CUR') or ole.exists('VBA'):
-                    self.add_log_details('macro', True)
+                    self.set_property('macro', True)
                     self.make_dangerous()
         else:
             indicators = oid.check()
             # Encrypted ban be set by multiple checks on the script
             if oid.encrypted.value:
-                self.add_log_details('encrypted', True)
+                self.set_property('encrypted', True)
                 self.make_dangerous()
             if oid.macros.value or oid.ole.exists('macros/vba') or oid.ole.exists('Macros') \
                     or oid.ole.exists('_VBA_PROJECT_CUR') or oid.ole.exists('VBA'):
-                self.add_log_details('macro', True)
+                self.set_property('macro', True)
                 self.make_dangerous()
             for i in indicators:
                 if i.id == 'ObjectPool' and i.value:
                     # FIXME: Is it suspicious?
-                    self.add_log_details('objpool', True)
+                    self.set_property('objpool', True)
                 elif i.id == 'flash' and i.value:
-                    self.add_log_details('flash', True)
+                    self.set_property('flash', True)
                     self.make_dangerous()
 
     def _ooxml(self):
         """Processes an ooxml file."""
-        self.add_log_details('processing_type', 'ooxml')
+        self.set_property('processing_type', 'ooxml')
         try:
             doc = officedissector.doc.Document(self.src_path)
         except Exception:
@@ -318,56 +315,56 @@ class File(FileBase):
         # There are probably other potentially malicious features:
         # fonts, custom props, custom XML
         if doc.is_macro_enabled or len(doc.features.macros) > 0:
-            self.add_log_details('macro', True)
+            self.set_property('macro', True)
             self.make_dangerous()
         if len(doc.features.embedded_controls) > 0:
-            self.add_log_details('activex', True)
+            self.set_property('activex', True)
             self.make_dangerous()
         if len(doc.features.embedded_objects) > 0:
             # Exploited by CVE-2014-4114 (OLE)
-            self.add_log_details('embedded_obj', True)
+            self.set_property('embedded_obj', True)
             self.make_dangerous()
         if len(doc.features.embedded_packages) > 0:
-            self.add_log_details('embedded_pack', True)
+            self.set_property('embedded_pack', True)
             self.make_dangerous()
 
     def _libreoffice(self):
         """Processes a libreoffice file."""
-        self.add_log_details('processing_type', 'libreoffice')
+        self.set_property('processing_type', 'libreoffice')
         # As long as there ar no way to do a sanity check on the files => dangerous
         try:
             lodoc = zipfile.ZipFile(self.src_path, 'r')
         except:
             # TODO: are there specific exceptions we should catch here? Or is anything ok
-            self.add_log_details('invalid', True)
+            self.set_property('invalid', True)
             self.make_dangerous()
         for f in lodoc.infolist():
             fname = f.filename.lower()
             if fname.startswith('script') or fname.startswith('basic') or \
                     fname.startswith('object') or fname.endswith('.bin'):
-                self.add_log_details('macro', True)
+                self.set_property('macro', True)
                 self.make_dangerous()
 
     def _pdf(self):
         """Processes a PDF file."""
-        self.add_log_details('processing_type', 'pdf')
+        self.set_property('processing_type', 'pdf')
         xmlDoc = PDFiD(self.src_path)
         oPDFiD = cPDFiD(xmlDoc, True)
         # TODO: are there other characteristics which should be dangerous?
         if oPDFiD.encrypt.count > 0:
-            self.add_log_details('encrypted', True)
+            self.set_property('encrypted', True)
             self.make_dangerous()
         if oPDFiD.js.count > 0 or oPDFiD.javascript.count > 0:
-            self.add_log_details('javascript', True)
+            self.set_property('javascript', True)
             self.make_dangerous()
         if oPDFiD.aa.count > 0 or oPDFiD.openaction.count > 0:
-            self.add_log_details('openaction', True)
+            self.set_property('openaction', True)
             self.make_dangerous()
         if oPDFiD.richmedia.count > 0:
-            self.add_log_details('flash', True)
+            self.set_property('flash', True)
             self.make_dangerous()
         if oPDFiD.launch.count > 0:
-            self.add_log_details('launch', True)
+            self.set_property('launch', True)
             self.make_dangerous()
 
     def _archive(self):
@@ -375,7 +372,7 @@ class File(FileBase):
         temporary directory and self.process_dir is called on that directory.
         The recursive archive depth is increased to protect against archive
         bombs."""
-        self.add_log_details('processing_type', 'archive')
+        self.set_property('processing_type', 'archive')
         self.is_recursive = True
         # self.log_string += 'Archive extracted, processing content.'
 
@@ -422,7 +419,7 @@ class File(FileBase):
 
                 with open(metadata_file_path, 'w+') as metadata_file:
                     metadata_file.write("Key: {}\tValue: {}\n".format(tag, printable))
-        self.add_log_details('metadata', 'exif')
+        self.set_property('metadata', 'exif')
         img.close()
         return True
 
@@ -435,7 +432,7 @@ class File(FileBase):
                 if tag not in ('icc_profile'):
                     with open(metadata_file_path, 'w+') as metadata_file:
                         metadata_file.write("Key: {}\tValue: {}\n".format(tag, img.info[tag]))
-            self.add_log_details('metadata', 'png')
+            self.set_property('metadata', 'png')
             img.close()
         # Catch decompression bombs
         except Exception as e:
@@ -467,7 +464,7 @@ class File(FileBase):
 
     def _media_processing(self):
         """Generic way to process all media files."""
-        self.add_log_details('processing_type', 'media')
+        self.set_property('processing_type', 'media')
 
     def image(self):
         """Processes an image.
@@ -493,7 +490,7 @@ class File(FileBase):
             print(e)
             self.make_dangerous()
         self.log_string += 'Image file'
-        self.add_log_details('processing_type', 'image')
+        self.set_property('processing_type', 'image')
 
 
 class KittenGroomerFileCheck(KittenGroomerBase):
@@ -554,7 +551,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
 
     def _handle_archivebomb(self, file):
         file.make_dangerous()
-        file.add_log_details('Archive Bomb', True)
+        file.set_property('Archive Bomb', True)
         self.logger.log.warning('ARCHIVE BOMB.')
         self.logger.log.warning('The content of the archive contains recursively other archives.')
         self.logger.log.warning('This is a bad sign so the archive is not extracted to the destination key.')
diff --git a/kittengroomer/__init__.py b/kittengroomer/__init__.py
index 262988a..8428553 100644
--- a/kittengroomer/__init__.py
+++ b/kittengroomer/__init__.py
@@ -1,4 +1,4 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-from .helpers import FileBase, KittenGroomerBase, GroomerLog, main
+from .helpers import FileBase, KittenGroomerBase, GroomerLogger, main
diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py
index 3e270dc..caaedff 100644
--- a/kittengroomer/helpers.py
+++ b/kittengroomer/helpers.py
@@ -41,15 +41,18 @@ class FileBase(object):
         """Initialized with the source path and expected destination path."""
         self.src_path = src_path
         self.dst_path = dst_path
-        # TODO: rename this to file_properties (and change in other groomers)
-        self.log_details = {'filepath': self.src_path}
+        self.filename = os.path.basename(self.src_path)
         self.log_string = ''
+        self.logger = logger
         self.extension = self._determine_extension()
         self.mimetype = self._determine_mimetype()
         if self.mimetype:
             self.main_type, self.sub_type = self._split_subtypes(self.mimetype)
-        self.logger = logger
-        self.filename = os.path.basename(self.src_path)
+        self._file_props = {'filepath': self.src_path,
+                            'filename': self.filename,
+                            'maintype': self.main_type,
+                            'subtype': self.sub_type,
+                            'extension': self.extension}
 
     def _determine_extension(self):
         _, ext = os.path.splitext(self.src_path)
@@ -68,8 +71,8 @@ class FileBase(object):
                 # Note: magic will always return something, even if it's just 'data'
             except UnicodeEncodeError as e:
                 # FIXME: The encoding of the file is broken (possibly UTF-16)
+                # LOG: log this error
                 mt = None
-                self.log_details.update({'UnicodeError': e})
             try:
                 mimetype = mt.decode("utf-8")
             except:
@@ -84,50 +87,50 @@ class FileBase(object):
         return main_type, sub_type
 
     def has_mimetype(self):
-        """
-        Returns True if file has a full mimetype, else False.
-
-        Returns False + updates log if self.main_type or self.sub_type
-        are not set.
-        """
+        """Returns True if file has a full mimetype, else False."""
         if not self.main_type or not self.sub_type:
-            self.log_details.update({'broken_mime': True})
+            # LOG: log this as an error
+            # self._file_props.update({'broken_mime': True})
             return False
-        return True
+        else:
+            return True
 
     def has_extension(self):
-        """
-        Returns True if self.extension is set, else False.
-
-        Returns False + updates self.log_details if self.extension is not set.
-        """
-        if self.extension == '':
-            self.log_details.update({'no_extension': True})
+        """Returns True if self.extension is set, else False."""
+        if self.extension is None:
+            # TODO: move this props updating to some other method
+            # self.set_property('no_extension', True)
             return False
-        return True
+        else:
+            return True
 
     def is_dangerous(self):
-        """Returns True if self.log_details contains 'dangerous'."""
-        return ('dangerous' in self.log_details)
+        """True if file has been marked 'dangerous' else False."""
+        return ('dangerous' in self._file_props)
 
     def is_unknown(self):
-        """Returns True if self.log_details contains 'unknown'."""
-        return ('unknown' in self.log_details)
+        """True if file has been marked 'unknown' else False."""
+        return ('unknown' in self._file_props)
 
     def is_binary(self):
-        """returns True if self.log_details contains 'binary'."""
-        return ('binary' in self.log_details)
+        """True if file has been marked 'binary' else False."""
+        return ('binary' in self._file_props)
 
     def is_symlink(self):
         """Returns True and updates log if file is a symlink."""
         if self.has_mimetype() and self.main_type == 'inode' and self.sub_type == 'symlink':
-            self.log_details.update({'symlink': os.readlink(self.src_path)})
+            # TODO: move this props updating somewhere else
+            # self.set_property('symlink', os.readlink(self.src_path))
             return True
-        return False
+        else:
+            return False
 
-    def add_log_details(self, key, value):
-        """Takes a key + a value and adds them to self.log_details."""
-        self.log_details[key] = value
+    def set_property(self, prop_string, value):
+        """Takes a property + a value and adds them to self._file_props."""
+        self._file_props[prop_string] = value
+
+    def get_property(self, file_prop):
+        return self._file_props.get(file_prop, None)
 
     def make_dangerous(self):
         """
@@ -138,7 +141,7 @@ class FileBase(object):
         """
         if self.is_dangerous():
             return
-        self.log_details['dangerous'] = True
+        self.set_property('dangerous', True)
         path, filename = os.path.split(self.dst_path)
         self.dst_path = os.path.join(path, 'DANGEROUS_{}_DANGEROUS'.format(filename))
 
@@ -146,7 +149,7 @@ class FileBase(object):
         """Marks a file as an unknown type and prepends UNKNOWN to filename."""
         if self.is_dangerous() or self.is_binary():
             return
-        self.log_details['unknown'] = True
+        self.set_property('unknown', True)
         path, filename = os.path.split(self.dst_path)
         self.dst_path = os.path.join(path, 'UNKNOWN_{}'.format(filename))
 
@@ -154,15 +157,17 @@ class FileBase(object):
         """Marks a file as a binary and appends .bin to filename."""
         if self.is_dangerous():
             return
-        self.log_details['binary'] = True
+        self.set_property('binary', True)
         path, filename = os.path.split(self.dst_path)
         self.dst_path = os.path.join(path, '{}.bin'.format(filename))
 
     def force_ext(self, ext):
-        """If dst_path does not end in ext, appends the ext and updates log."""
+        """If dst_path does not end in ext, changes it and edits _file_props."""
         if not self.dst_path.endswith(ext):
-            self.log_details['force_ext'] = True
+            self.set_property('force_ext', True)
             self.dst_path += ext
+        if not self._file_props['extension'] == ext:
+            self.set_property('extension', ext)
 
     def create_metadata_file(self, ext):
         """Create a separate file to hold this file's metadata."""
@@ -187,8 +192,9 @@ class FileBase(object):
 class GroomerLogger(object):
     """Groomer logging interface"""
 
-    def __init__(self, root_dir, debug=False):
-        self.log_dir_path = os.path.join(root_dir, 'logs')
+    def __init__(self, root_dir_path, debug=False):
+        self.root_dir = root_dir_path
+        self.log_dir_path = os.path.join(root_dir_path, 'logs')
         if os.path.exists(self.log_dir_path):
             shutil.rmtree(self.log_dir_path)
         os.makedirs(self.log_dir_path)
@@ -252,21 +258,25 @@ class KittenGroomerBase(object):
 
     def _safe_rmtree(self, directory):
         """Remove a directory tree if it exists."""
+        # TODO: should be a public method
         if os.path.exists(directory):
             shutil.rmtree(directory)
 
     def _safe_remove(self, filepath):
         """Remove a file if it exists."""
+        # TODO: should be a public method
         if os.path.exists(filepath):
             os.remove(filepath)
 
     def _safe_mkdir(self, directory):
         """Make a directory if it does not exist."""
+        # TODO: should be a public method
         if not os.path.exists(directory):
             os.makedirs(directory)
 
     def _safe_copy(self, src=None, dst=None):
         """Copy a file and create directory if needed."""
+        # TODO: should be a public method
         if src is None:
             src = self.cur_file.src_path
         if dst is None:
diff --git a/tests/test_kittengroomer.py b/tests/test_kittengroomer.py
index bb5ab5d..4497728 100644
--- a/tests/test_kittengroomer.py
+++ b/tests/test_kittengroomer.py
@@ -5,7 +5,7 @@ import os
 
 import pytest
 
-from kittengroomer import FileBase, KittenGroomerBase, GroomerLog
+from kittengroomer import FileBase, KittenGroomerBase, GroomerLogger
 from kittengroomer.helpers import ImplementationRequired
 
 skip = pytest.mark.skip
@@ -95,13 +95,10 @@ class TestFileBase:
 
     def test_init(self, generic_conf_file):
         file = generic_conf_file
-        assert file.log_details
-        assert file.log_details['filepath'] == file.src_path
-        assert file.extension == '.conf'
-        copied_log = file.log_details.copy()
-        file.log_details = ''
-        # assert file.log_details == copied_log     # this fails for now, we need to make log_details undeletable
-        # we should probably check for more extensions here
+        assert file.get_property('filepath') == file.src_path
+        assert file.get_property('extension') == '.conf'
+        # TODO: should we make log_details undeletable?
+        # TODO: we should probably check for more extensions here
 
     def test_extension_uppercase(self, tmpdir):
         file_path = tmpdir.join('TEST.TXT')
@@ -129,13 +126,13 @@ class TestFileBase:
 
     def test_has_extension(self, temp_file, temp_file_no_ext):
         assert temp_file.has_extension() is True
+        print(temp_file_no_ext.extension)
         assert temp_file_no_ext.has_extension() is False
 
-    def test_add_log_details(self, generic_conf_file):
-        generic_conf_file.add_log_details('test', True)
-        assert generic_conf_file.log_details['test'] is True
-        with pytest.raises(KeyError):
-            assert generic_conf_file.log_details['wrong'] is False
+    def test_set_property(self, generic_conf_file):
+        generic_conf_file.set_property('test', True)
+        assert generic_conf_file.get_property('test') is True
+        assert generic_conf_file.get_property('wrong') is None
 
     def test_marked_dangerous(self, file_marked_all_parameterized):
         file_marked_all_parameterized.make_dangerous()
@@ -164,52 +161,54 @@ class TestFileBase:
         assert symlink.is_symlink() is True
 
     def test_generic_make_unknown(self, generic_conf_file):
-        assert generic_conf_file.log_details.get('unknown') is None
+        assert generic_conf_file.is_unknown() is False
         generic_conf_file.make_unknown()
-        assert generic_conf_file.log_details.get('unknown') is True
+        assert generic_conf_file.is_unknown()
         # given a FileBase object with no marking, should do the right things
 
     def test_marked_make_unknown(self, file_marked_all_parameterized):
         file = file_marked_all_parameterized
-        if file.log_details.get('unknown'):
+        if file.is_unknown():
             file.make_unknown()
-            assert file.log_details.get('unknown') is True
+            assert file.is_unknown()
         else:
-            assert file.log_details.get('unknown') is None
+            assert file.is_unknown() is False
             file.make_unknown()
-            assert file.log_details.get('unknown') is None
+            assert file.is_unknown() is False
         # given a FileBase object with an unrecognized marking, should ???
 
     def test_generic_make_binary(self, generic_conf_file):
-        assert generic_conf_file.log_details.get('binary') is None
+        assert generic_conf_file.is_binary() is False
         generic_conf_file.make_binary()
-        assert generic_conf_file.log_details.get('binary') is True
+        assert generic_conf_file.is_binary() is True
 
     def test_marked_make_binary(self, file_marked_all_parameterized):
         file = file_marked_all_parameterized
-        if file.log_details.get('dangerous'):
+        if file.is_dangerous():
             file.make_binary()
-            assert file.log_details.get('binary') is None
+            assert file.is_binary() is False
+            assert file.get_property('binary') is None
         else:
             file.make_binary()
-            assert file.log_details.get('binary') is True
+            assert file.is_binary()
+            assert file.get_property('binary') is True
 
     def test_force_ext_change(self, generic_conf_file):
         assert generic_conf_file.has_extension()
-        assert generic_conf_file.extension == '.conf'
+        assert generic_conf_file.get_property('extension') == '.conf'
         assert os.path.splitext(generic_conf_file.dst_path)[1] == '.conf'
         generic_conf_file.force_ext('.txt')
         assert os.path.splitext(generic_conf_file.dst_path)[1] == '.txt'
-        assert generic_conf_file.log_details.get('force_ext') is True
-        # should make a file's extension change
+        assert generic_conf_file.get_property('force_ext') is True
+        assert generic_conf_file.get_property('extension') == '.txt'
         # should be able to handle weird paths
 
     def test_force_ext_correct(self, generic_conf_file):
         assert generic_conf_file.has_extension()
-        assert generic_conf_file.extension == '.conf'
+        assert generic_conf_file.get_property('extension') == '.conf'
         generic_conf_file.force_ext('.conf')
         assert os.path.splitext(generic_conf_file.dst_path)[1] == '.conf'
-        assert generic_conf_file.log_details.get('force_ext') is None
+        assert generic_conf_file.get_property('force_ext') is None
         # shouldn't change a file's extension if it already is right
 
     def test_create_metadata_file(self, temp_file):
@@ -224,9 +223,13 @@ class TestFileBase:
 
 
 class TestLogger:
-    @xfail
-    def test_tree(self, tmpdir):
-        GroomerLog.tree(tmpdir)
+
+    @fixture
+    def generic_logger(self, tmpdir):
+        return GroomerLogger(tmpdir.strpath)
+
+    def test_tree(self, generic_logger):
+        generic_logger.tree(generic_logger.root_dir)
 
 
 class TestKittenGroomerBase:

From fc8923fddd10430c9706edc897ed7585c2b4db2c Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Tue, 7 Mar 2017 15:31:32 -0500
Subject: [PATCH 25/36] Change Groomer private methods to public

* Changed safe_rmtree, safe_copy, safe_remove, and safe_mkdir to public methods
* If something is being used in a subclass it probably shouldn't be a private
method
---
 bin/filecheck.py            |  6 +++---
 kittengroomer/helpers.py    | 14 +++++---------
 tests/test_kittengroomer.py |  2 +-
 3 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/bin/filecheck.py b/bin/filecheck.py
index a49297d..a0c4f6f 100644
--- a/bin/filecheck.py
+++ b/bin/filecheck.py
@@ -523,10 +523,10 @@ class KittenGroomerFileCheck(KittenGroomerBase):
             self.process_archive(file)
         else:
             # TODO: Make a File attribute for should be copied True/False and check it
-            self._safe_copy()
+            self.safe_copy()
         file.write_log()
         if hasattr(file, "tempdir_path"):
-            self._safe_rmtree(file.tempdir_path)
+            self.safe_rmtree(file.tempdir_path)
 
     def process_archive(self, file):
         """Unpacks an archive using 7zip and processes contents.
@@ -546,7 +546,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
             # LOG: check that tree is working correctly here
             self.logger.tree(tempdir_path)
             self.process_dir(tempdir_path, file.dst_path)
-            self._safe_rmtree(tempdir_path)
+            self.safe_rmtree(tempdir_path)
         self.recursive_archive_depth -= 1
 
     def _handle_archivebomb(self, file):
diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py
index caaedff..cf9e098 100644
--- a/kittengroomer/helpers.py
+++ b/kittengroomer/helpers.py
@@ -256,34 +256,30 @@ class KittenGroomerBase(object):
         self.cur_file = None
         self.logger = GroomerLogger(self.dst_root_dir, debug)
 
-    def _safe_rmtree(self, directory):
+    def safe_rmtree(self, directory):
         """Remove a directory tree if it exists."""
-        # TODO: should be a public method
         if os.path.exists(directory):
             shutil.rmtree(directory)
 
-    def _safe_remove(self, filepath):
+    def safe_remove(self, filepath):
         """Remove a file if it exists."""
-        # TODO: should be a public method
         if os.path.exists(filepath):
             os.remove(filepath)
 
-    def _safe_mkdir(self, directory):
+    def safe_mkdir(self, directory):
         """Make a directory if it does not exist."""
-        # TODO: should be a public method
         if not os.path.exists(directory):
             os.makedirs(directory)
 
-    def _safe_copy(self, src=None, dst=None):
+    def safe_copy(self, src=None, dst=None):
         """Copy a file and create directory if needed."""
-        # TODO: should be a public method
         if src is None:
             src = self.cur_file.src_path
         if dst is None:
             dst = self.cur_file.dst_path
         try:
             dst_path, filename = os.path.split(dst)
-            self._safe_mkdir(dst_path)
+            self.safe_mkdir(dst_path)
             shutil.copy(src, dst)
             return True
         except Exception as e:
diff --git a/tests/test_kittengroomer.py b/tests/test_kittengroomer.py
index 4497728..5866aaf 100644
--- a/tests/test_kittengroomer.py
+++ b/tests/test_kittengroomer.py
@@ -264,7 +264,7 @@ class TestKittenGroomerBase:
         filedest = testdir.join('test.txt')
         simple_groomer = KittenGroomerBase(tmpdir.strpath, testdir.strpath)
         simple_groomer.cur_file = FileBase(file.strpath, filedest.strpath)
-        assert simple_groomer._safe_copy() is True
+        assert simple_groomer.safe_copy() is True
         #check that it handles weird file path inputs
 
     def test_list_all_files(self, tmpdir):

From 0038d3ef66a3ea9634848e86de38a52c36d274e1 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Wed, 8 Mar 2017 21:30:06 -0500
Subject: [PATCH 26/36] Switch to using file properties

* make_dangerous now takes a description string
* add_file_string takes strings describing the file
---
 bin/filecheck.py            | 143 +++++++++++++++---------------------
 kittengroomer/helpers.py    |  81 +++++++++++++-------
 tests/test_filecheck.py     |   3 +
 tests/test_kittengroomer.py |  40 ++++------
 4 files changed, 129 insertions(+), 138 deletions(-)

diff --git a/bin/filecheck.py b/bin/filecheck.py
index a0c4f6f..47f9c02 100644
--- a/bin/filecheck.py
+++ b/bin/filecheck.py
@@ -120,13 +120,12 @@ class File(FileBase):
         }
 
     def _check_dangerous(self):
-        if not self.has_mimetype():  # No mimetype, should not happen.
-            self.make_dangerous()
+        if not self.has_mimetype():
+            self.make_dangerous('no mimetype')
         if not self.has_extension():
-            self.make_dangerous()
+            self.make_dangerous('no extension')
         if self.extension in Config.malicious_exts:
-            self.set_property('malicious_extension', self.extension)
-            self.make_dangerous()
+            self.make_dangerous('malicious_extension')
 
     def _check_extension(self):
         """Guesses the file's mimetype based on its extension. If the file's
@@ -143,8 +142,8 @@ class File(FileBase):
                 expected_mimetype = Config.aliases[expected_mimetype]
         is_known_extension = self.extension in mimetypes.types_map.keys()
         if is_known_extension and expected_mimetype != self.mimetype:
-            self.set_property('expected_mimetype', expected_mimetype)
-            self.make_dangerous()
+            # LOG: improve this string
+            self.make_dangerous('expected_mimetyped')
 
     def _check_mimetype(self):
         """Takes the mimetype (as determined by libmagic) and determines
@@ -158,8 +157,8 @@ class File(FileBase):
                                                              strict=False)
         if expected_extensions:
             if self.has_extension() and self.extension not in expected_extensions:
-                self.set_property('expected_extensions', expected_extensions)
-                self.make_dangerous()
+                # LOG: improve this string
+                self.make_dangerous('expected extensions')
 
     def check(self):
         self._check_dangerous()
@@ -179,11 +178,11 @@ class File(FileBase):
 
     def write_log(self):
         """Print the logs related to the current file being processed."""
-        # LOG: move to helpers.py and change
+        # LOG: move to helpers.py GroomerLogger and modify
         tmp_log = self.logger.log.fields(**self._file_props)
         if self.is_dangerous():
             tmp_log.warning(self.log_string)
-        elif self.get_property('unknown') or self.get_property('binary'):
+        elif self.is_unknown() or self.is_binary():
             tmp_log.info(self.log_string)
         else:
             tmp_log.debug(self.log_string)
@@ -206,67 +205,71 @@ class File(FileBase):
     def inode(self):
         """Empty file or symlink."""
         if self.is_symlink():
-            self.log_string += 'Symlink to {}'.format(self.get_property('symlink'))
+            symlink_path = self.get_property('symlink')
+            self.add_file_string('Symlink to {}'.format(symlink_path))
         else:
-            self.log_string += 'Inode file'
+            self.add_file_string('Inode file')
 
     def unknown(self):
         """Main type should never be unknown."""
-        self.log_string += 'Unknown file'
+        self.add_file_string('Unknown file')
 
     def example(self):
         """Used in examples, should never be returned by libmagic."""
-        self.log_string += 'Example file'
+        self.add_file_string('Example file')
 
     def multipart(self):
         """Used in web apps, should never be returned by libmagic"""
-        self.log_string += 'Multipart file'
+        self.add_file_string('Multipart file')
 
     # ##### Treated as malicious, no reason to have it on a USB key ######
     def message(self):
         """Process a message file."""
-        self.log_string += 'Message file'
-        self.make_dangerous()
+        self.add_file_string('Message file')
+        self.make_dangerous('Message file')
 
     def model(self):
         """Process a model file."""
-        self.log_string += 'Model file'
-        self.make_dangerous()
+        self.add_file_string('Model file')
+        self.make_dangerous('Model file')
 
     # ##### Files that will be converted ######
     def text(self):
         """Process an rtf, ooxml, or plaintext file."""
         for mt in Config.mimes_rtf:
             if mt in self.sub_type:
-                self.log_string += 'Rich Text file'
+                self.add_file_string('Rich Text file')
                 # TODO: need a way to convert it to plain text
                 self.force_ext('.txt')
                 return
         for mt in Config.mimes_ooxml:
             if mt in self.sub_type:
-                self.log_string += 'OOXML File'
+                self.add_file_string('OOXML File')
                 self._ooxml()
                 return
-        self.log_string += 'Text file'
+        self.add_file_string('Text file')
         self.force_ext('.txt')
 
     def application(self):
         """Processes an application specific file according to its subtype."""
         for subtype, method in self.app_subtype_methods.items():
             if subtype in self.sub_type:
+                # TODO: should this return a value?
                 method()
-                self.log_string += 'Application file'
+                self.add_file_string('Application file')
                 return
-        self.log_string += 'Unknown Application file'
+        self.add_file_string('Unknown Application file')
         self._unknown_app()
 
     def _executables(self):
         """Processes an executable file."""
+        # LOG: change this property
         self.set_property('processing_type', 'executable')
-        self.make_dangerous()
+        self.make_dangerous('executable')
 
     def _winoffice(self):
         """Processes a winoffice file using olefile/oletools."""
+        # LOG: change this property
         self.set_property('processing_type', 'WinOffice')
         # Try as if it is a valid document
         oid = oletools.oleid.OleID(self.src_path)
@@ -275,33 +278,27 @@ class File(FileBase):
             try:
                 ole = olefile.OleFileIO(self.src_path, raise_defects=olefile.DEFECT_INCORRECT)
             except:
-                self.set_property('not_parsable', True)
-                self.make_dangerous()
+                self.make_dangerous('not parsable')
             if ole.parsing_issues:
-                self.set_property('parsing_issues', True)
-                self.make_dangerous()
+                self.make_dangerous('parsing issues')
             else:
                 if ole.exists('macros/vba') or ole.exists('Macros') \
                         or ole.exists('_VBA_PROJECT_CUR') or ole.exists('VBA'):
-                    self.set_property('macro', True)
-                    self.make_dangerous()
+                    self.make_dangerous('macro')
         else:
             indicators = oid.check()
-            # Encrypted ban be set by multiple checks on the script
+            # Encrypted can be set by multiple checks on the script
             if oid.encrypted.value:
-                self.set_property('encrypted', True)
-                self.make_dangerous()
+                self.make_dangerous('encrypted')
             if oid.macros.value or oid.ole.exists('macros/vba') or oid.ole.exists('Macros') \
                     or oid.ole.exists('_VBA_PROJECT_CUR') or oid.ole.exists('VBA'):
-                self.set_property('macro', True)
-                self.make_dangerous()
+                self.make_dangerous('macro')
             for i in indicators:
                 if i.id == 'ObjectPool' and i.value:
-                    # FIXME: Is it suspicious?
+                    # TODO: Is it suspicious?
                     self.set_property('objpool', True)
                 elif i.id == 'flash' and i.value:
-                    self.set_property('flash', True)
-                    self.make_dangerous()
+                    self.make_dangerous('flash')
 
     def _ooxml(self):
         """Processes an ooxml file."""
@@ -309,24 +306,19 @@ class File(FileBase):
         try:
             doc = officedissector.doc.Document(self.src_path)
         except Exception:
-            # Invalid file
-            self.make_dangerous()
+            self.make_dangerous('invalid ooxml file')
             return
         # There are probably other potentially malicious features:
         # fonts, custom props, custom XML
         if doc.is_macro_enabled or len(doc.features.macros) > 0:
-            self.set_property('macro', True)
-            self.make_dangerous()
+            self.make_dangerous('macro')
         if len(doc.features.embedded_controls) > 0:
-            self.set_property('activex', True)
-            self.make_dangerous()
+            self.make_dangerous('activex')
         if len(doc.features.embedded_objects) > 0:
             # Exploited by CVE-2014-4114 (OLE)
-            self.set_property('embedded_obj', True)
-            self.make_dangerous()
+            self.make_dangerous('embedded obj')
         if len(doc.features.embedded_packages) > 0:
-            self.set_property('embedded_pack', True)
-            self.make_dangerous()
+            self.make_dangerous('embedded pack')
 
     def _libreoffice(self):
         """Processes a libreoffice file."""
@@ -336,14 +328,12 @@ class File(FileBase):
             lodoc = zipfile.ZipFile(self.src_path, 'r')
         except:
             # TODO: are there specific exceptions we should catch here? Or is anything ok
-            self.set_property('invalid', True)
-            self.make_dangerous()
+            self.make_dangerous('invalid libreoffice file')
         for f in lodoc.infolist():
             fname = f.filename.lower()
             if fname.startswith('script') or fname.startswith('basic') or \
                     fname.startswith('object') or fname.endswith('.bin'):
-                self.set_property('macro', True)
-                self.make_dangerous()
+                self.make_dangerous('macro')
 
     def _pdf(self):
         """Processes a PDF file."""
@@ -352,20 +342,15 @@ class File(FileBase):
         oPDFiD = cPDFiD(xmlDoc, True)
         # TODO: are there other characteristics which should be dangerous?
         if oPDFiD.encrypt.count > 0:
-            self.set_property('encrypted', True)
-            self.make_dangerous()
+            self.make_dangerous('encrypted pdf')
         if oPDFiD.js.count > 0 or oPDFiD.javascript.count > 0:
-            self.set_property('javascript', True)
-            self.make_dangerous()
+            self.make_dangerous('pdf with javascript')
         if oPDFiD.aa.count > 0 or oPDFiD.openaction.count > 0:
-            self.set_property('openaction', True)
-            self.make_dangerous()
+            self.make_dangerous('openaction')
         if oPDFiD.richmedia.count > 0:
-            self.set_property('flash', True)
-            self.make_dangerous()
+            self.make_dangerous('flash')
         if oPDFiD.launch.count > 0:
-            self.set_property('launch', True)
-            self.make_dangerous()
+            self.make_dangerous('launch')
 
     def _archive(self):
         """Processes an archive using 7zip. The archive is extracted to a
@@ -394,16 +379,12 @@ class File(FileBase):
         try:
             tags = exifread.process_file(img, debug=True)
         except Exception as e:
-            # LOG: log instead of print
-            print("Error while trying to grab full metadata for file {}; retrying for partial data.".format(self.src_path))
-            print(e)
+            self.add_error(e, "Error while trying to grab full metadata for file {}; retrying for partial data.".format(self.src_path))
         if tags is None:
             try:
                 tags = exifread.process_file(img, debug=True)
             except Exception as e:
-                # LOG: log instead of print
-                print("Failed to get any metadata for file {}.".format(self.src_path))
-                print(e)
+                self.add_error(e, "Failed to get any metadata for file {}.".format(self.src_path))
                 img.close()
                 return False
 
@@ -436,10 +417,8 @@ class File(FileBase):
             img.close()
         # Catch decompression bombs
         except Exception as e:
-            # LOG: log instead of print
-            print("Caught exception processing metadata for {}".format(self.src_path))
-            print(e)
-            self.make_dangerous()
+            self.add_error(e, "Caught exception processing metadata for {}".format(self.src_path))
+            self.make_dangerous('exception processing metadata')
             return False
 
     def extract_metadata(self):
@@ -485,11 +464,9 @@ class File(FileBase):
             self.src_path = tempfile_path
         except Exception as e:  # Catch decompression bombs
             # TODO: change this from all Exceptions to specific DecompressionBombWarning
-            # LOG: change this from printing to logging
-            print("Caught exception (possible decompression bomb?) while translating file {}.".format(self.src_path))
-            print(e)
+            self.add_error(e, "Caught exception (possible decompression bomb?) while translating file {}.".format(self.src_path))
             self.make_dangerous()
-        self.log_string += 'Image file'
+        self.add_file_string('Image file')
         self.set_property('processing_type', 'image')
 
 
@@ -521,11 +498,10 @@ class KittenGroomerFileCheck(KittenGroomerBase):
         file.check()
         if file.is_recursive:
             self.process_archive(file)
-        else:
-            # TODO: Make a File attribute for should be copied True/False and check it
-            self.safe_copy()
+        elif file.should_copy:
+            self.safe_copy(file.src_path, file.dst_path)
         file.write_log()
-        if hasattr(file, "tempdir_path"):
+        if hasattr(file, 'tempdir_path'):
             self.safe_rmtree(file.tempdir_path)
 
     def process_archive(self, file):
@@ -550,8 +526,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
         self.recursive_archive_depth -= 1
 
     def _handle_archivebomb(self, file):
-        file.make_dangerous()
-        file.set_property('Archive Bomb', True)
+        file.make_dangerous('Archive bomb')
         self.logger.log.warning('ARCHIVE BOMB.')
         self.logger.log.warning('The content of the archive contains recursively other archives.')
         self.logger.log.warning('This is a bad sign so the archive is not extracted to the destination key.')
diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py
index cf9e098..cef4c8d 100644
--- a/kittengroomer/helpers.py
+++ b/kittengroomer/helpers.py
@@ -44,15 +44,26 @@ class FileBase(object):
         self.filename = os.path.basename(self.src_path)
         self.log_string = ''
         self.logger = logger
+        self.symlink = None
         self.extension = self._determine_extension()
         self.mimetype = self._determine_mimetype()
+        self.should_copy = True
         if self.mimetype:
             self.main_type, self.sub_type = self._split_subtypes(self.mimetype)
-        self._file_props = {'filepath': self.src_path,
-                            'filename': self.filename,
-                            'maintype': self.main_type,
-                            'subtype': self.sub_type,
-                            'extension': self.extension}
+        self._file_props = {
+            'filepath': self.src_path,
+            'filename': self.filename,
+            'maintype': self.main_type,
+            'subtype': self.sub_type,
+            'extension': self.extension,
+            'file_size': self.size,
+            'safety_category': None,
+            'symlink': self.symlink,
+            'copied': False,
+            'file_string_set': set(),
+            'errors': {},
+            'user_defined': {}
+        }
 
     def _determine_extension(self):
         _, ext = os.path.splitext(self.src_path)
@@ -65,13 +76,14 @@ class FileBase(object):
         if os.path.islink(self.src_path):
             # magic will throw an IOError on a broken symlink
             mimetype = 'inode/symlink'
+            self.symlink = os.readlink(self.src_path)
         else:
             try:
                 mt = magic.from_file(self.src_path, mime=True)
                 # Note: magic will always return something, even if it's just 'data'
             except UnicodeEncodeError as e:
                 # FIXME: The encoding of the file is broken (possibly UTF-16)
-                # LOG: log this error
+                self.add_error(e, '')
                 mt = None
             try:
                 mimetype = mt.decode("utf-8")
@@ -86,10 +98,14 @@ class FileBase(object):
             main_type, sub_type = None, None
         return main_type, sub_type
 
+    @property
+    def size(self):
+        return os.path.getsize(self.src_path)
+
     def has_mimetype(self):
         """Returns True if file has a full mimetype, else False."""
         if not self.main_type or not self.sub_type:
-            # LOG: log this as an error
+            # LOG: log this as an error, move somewhere else?
             # self._file_props.update({'broken_mime': True})
             return False
         else:
@@ -98,7 +114,7 @@ class FileBase(object):
     def has_extension(self):
         """Returns True if self.extension is set, else False."""
         if self.extension is None:
-            # TODO: move this props updating to some other method
+            # TODO: do we actually want to have a seperate prop for no extension?
             # self.set_property('no_extension', True)
             return False
         else:
@@ -106,33 +122,46 @@ class FileBase(object):
 
     def is_dangerous(self):
         """True if file has been marked 'dangerous' else False."""
-        return ('dangerous' in self._file_props)
+        return self._file_props['safety_category'] is 'dangerous'
 
     def is_unknown(self):
         """True if file has been marked 'unknown' else False."""
-        return ('unknown' in self._file_props)
+        return self._file_props['safety_category'] is 'unknown'
 
     def is_binary(self):
         """True if file has been marked 'binary' else False."""
-        return ('binary' in self._file_props)
+        return self._file_props['safety_category'] is 'binary'
 
     def is_symlink(self):
         """Returns True and updates log if file is a symlink."""
-        if self.has_mimetype() and self.main_type == 'inode' and self.sub_type == 'symlink':
-            # TODO: move this props updating somewhere else
-            # self.set_property('symlink', os.readlink(self.src_path))
-            return True
-        else:
+        if self._file_props['symlink'] is None:
             return False
+        else:
+            return True
 
     def set_property(self, prop_string, value):
         """Takes a property + a value and adds them to self._file_props."""
-        self._file_props[prop_string] = value
+        if prop_string in self._file_props.keys():
+            self._file_props[prop_string] = value
+        else:
+            self._file_props['user_defined'][prop_string] = value
 
     def get_property(self, file_prop):
-        return self._file_props.get(file_prop, None)
+        # FIXME: needs to be refactored
+        if file_prop in self._file_props:
+            return self._file_props[file_prop]
+        elif file_prop in self._file_props['user_defined']:
+            return self._file_props['user_defined'][file_prop]
+        else:
+            return None
 
-    def make_dangerous(self):
+    def add_error(self, error, info):
+        self._file_props['errors'].update({error: info})
+
+    def add_file_string(self, file_string):
+        self._file_props['file_string_set'].add(file_string)
+
+    def make_dangerous(self, reason_string=None):
         """
         Marks a file as dangerous.
 
@@ -141,7 +170,8 @@ class FileBase(object):
         """
         if self.is_dangerous():
             return
-        self.set_property('dangerous', True)
+        self.set_property('safety_category', 'dangerous')
+        # LOG: store reason string
         path, filename = os.path.split(self.dst_path)
         self.dst_path = os.path.join(path, 'DANGEROUS_{}_DANGEROUS'.format(filename))
 
@@ -149,7 +179,7 @@ class FileBase(object):
         """Marks a file as an unknown type and prepends UNKNOWN to filename."""
         if self.is_dangerous() or self.is_binary():
             return
-        self.set_property('unknown', True)
+        self.set_property('safety_category', 'unknown')
         path, filename = os.path.split(self.dst_path)
         self.dst_path = os.path.join(path, 'UNKNOWN_{}'.format(filename))
 
@@ -157,7 +187,7 @@ class FileBase(object):
         """Marks a file as a binary and appends .bin to filename."""
         if self.is_dangerous():
             return
-        self.set_property('binary', True)
+        self.set_property('safety_category', 'binary')
         path, filename = os.path.split(self.dst_path)
         self.dst_path = os.path.join(path, '{}.bin'.format(filename))
 
@@ -185,7 +215,7 @@ class FileBase(object):
                 self.metadata_file_path = self.dst_path + ext
                 return self.metadata_file_path
         except KittenGroomerError as e:
-            # LOG: Write to log file
+            self.add_error(e, '')
             return False
 
 
@@ -281,11 +311,8 @@ class KittenGroomerBase(object):
             dst_path, filename = os.path.split(dst)
             self.safe_mkdir(dst_path)
             shutil.copy(src, dst)
-            return True
         except Exception as e:
-            # LOG: Logfile
-            print(e)
-            return False
+            self.add_error(e, '')
 
     def list_all_files(self, directory):
         """Generator yielding path to all of the files in a directory tree."""
diff --git a/tests/test_filecheck.py b/tests/test_filecheck.py
index b573a64..f187523 100644
--- a/tests/test_filecheck.py
+++ b/tests/test_filecheck.py
@@ -43,6 +43,9 @@ class TestIntegration:
         test_description = "filecheck_valid"
         save_logs(groomer, test_description)
 
+    def test_processdir(self):
+        pass
+
 
 class TestFileHandling:
     def test_autorun(self):
diff --git a/tests/test_kittengroomer.py b/tests/test_kittengroomer.py
index 5866aaf..41f9a40 100644
--- a/tests/test_kittengroomer.py
+++ b/tests/test_kittengroomer.py
@@ -30,7 +30,7 @@ class TestFileBase:
         return FileBase(source_file, dest_file)
 
     @fixture
-    def symlink(self, tmpdir):
+    def symlink_file(self, tmpdir):
         file_path = tmpdir.join('test.txt')
         file_path.write('testing')
         file_path = file_path.strpath
@@ -65,7 +65,7 @@ class TestFileBase:
 
     @fixture
     def file_marked_binary(self, generic_conf_file):
-        generic_conf_file.mark_binary()
+        generic_conf_file.make_binary()
         return generic_conf_file
 
     @fixture(params=[
@@ -81,24 +81,17 @@ class TestFileBase:
     # What should FileBase do if it's given a path that isn't a file (doesn't exist or is a dir)? Currently magic throws an exception
     # We should probably catch everytime that happens and tell the user explicitly happened (and maybe put it in the log)
 
-    def test_create(self):
-        file = FileBase('tests/src_valid/blah.conf', '/tests/dst/blah.conf')
-
     def test_create_broken(self, tmpdir):
         with pytest.raises(TypeError):
-            file_no_args = FileBase()
+            FileBase()
         with pytest.raises(FileNotFoundError):
-            file_empty_args = FileBase('', '')
+            FileBase('', '')
         with pytest.raises(IsADirectoryError):
-            file_directory = FileBase(tmpdir.strpath, tmpdir.strpath)
-        # are there other cases here? path to a file that doesn't exist? permissions?
+            FileBase(tmpdir.strpath, tmpdir.strpath)
+        # TODO: are there other cases here? path to a file that doesn't exist? permissions?
 
     def test_init(self, generic_conf_file):
-        file = generic_conf_file
-        assert file.get_property('filepath') == file.src_path
-        assert file.get_property('extension') == '.conf'
-        # TODO: should we make log_details undeletable?
-        # TODO: we should probably check for more extensions here
+        generic_conf_file
 
     def test_extension_uppercase(self, tmpdir):
         file_path = tmpdir.join('TEST.TXT')
@@ -108,10 +101,10 @@ class TestFileBase:
         assert file.extension == '.txt'
 
     def test_mimetypes(self, generic_conf_file):
-        assert generic_conf_file.has_mimetype()
         assert generic_conf_file.mimetype == 'text/plain'
         assert generic_conf_file.main_type == 'text'
         assert generic_conf_file.sub_type == 'plain'
+        assert generic_conf_file.has_mimetype()
         # Need to test something without a mimetype
         # Need to test something that's a directory
         # Need to test something that causes the unicode exception
@@ -151,14 +144,14 @@ class TestFileBase:
         file_path = file_path.strpath
         symlink_path = tmpdir.join('symlinked.txt')
         symlink_path = symlink_path.strpath
-        file_symlink = os.symlink(file_path, symlink_path)
+        os.symlink(file_path, symlink_path)
         file = FileBase(file_path, file_path)
         symlink = FileBase(symlink_path, symlink_path)
         assert file.is_symlink() is False
         assert symlink.is_symlink() is True
 
-    def test_has_symlink_fixture(self, symlink):
-        assert symlink.is_symlink() is True
+    def test_has_symlink_fixture(self, symlink_file):
+        assert symlink_file.is_symlink() is True
 
     def test_generic_make_unknown(self, generic_conf_file):
         assert generic_conf_file.is_unknown() is False
@@ -187,11 +180,9 @@ class TestFileBase:
         if file.is_dangerous():
             file.make_binary()
             assert file.is_binary() is False
-            assert file.get_property('binary') is None
         else:
             file.make_binary()
             assert file.is_binary()
-            assert file.get_property('binary') is True
 
     def test_force_ext_change(self, generic_conf_file):
         assert generic_conf_file.has_extension()
@@ -254,7 +245,6 @@ class TestKittenGroomerBase:
         debug_groomer = KittenGroomerBase(source_directory,
                                           dest_directory,
                                           debug=True)
-        # we should maybe protect access to self.current_file in some way?
 
     def test_safe_copy(self, tmpdir):
         file = tmpdir.join('test.txt')
@@ -264,8 +254,8 @@ class TestKittenGroomerBase:
         filedest = testdir.join('test.txt')
         simple_groomer = KittenGroomerBase(tmpdir.strpath, testdir.strpath)
         simple_groomer.cur_file = FileBase(file.strpath, filedest.strpath)
-        assert simple_groomer.safe_copy() is True
-        #check that it handles weird file path inputs
+        simple_groomer.safe_copy()
+        # check that safe copy can handle weird file path inputs
 
     def test_list_all_files(self, tmpdir):
         file = tmpdir.join('test.txt')
@@ -276,7 +266,3 @@ class TestKittenGroomerBase:
         files = simple_groomer.list_all_files(simple_groomer.src_root_dir)
         assert file.strpath in files
         assert testdir.strpath not in files
-
-    def test_processdir(self, generic_groomer):
-        with pytest.raises(ImplementationRequired):
-            generic_groomer.processdir(None, None)

From 3fe8c7c223ac921dd596dbbd91b00425eca248fd Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Wed, 8 Mar 2017 23:06:20 -0500
Subject: [PATCH 27/36] Adjust order of property initialization

Tests were failing due to values being set before file_props dict
was created
---
 bin/filecheck.py         |  6 ------
 kittengroomer/helpers.py | 28 +++++++++++++++-------------
 2 files changed, 15 insertions(+), 19 deletions(-)

diff --git a/bin/filecheck.py b/bin/filecheck.py
index 47f9c02..396d8b3 100644
--- a/bin/filecheck.py
+++ b/bin/filecheck.py
@@ -180,12 +180,6 @@ class File(FileBase):
         """Print the logs related to the current file being processed."""
         # LOG: move to helpers.py GroomerLogger and modify
         tmp_log = self.logger.log.fields(**self._file_props)
-        if self.is_dangerous():
-            tmp_log.warning(self.log_string)
-        elif self.is_unknown() or self.is_binary():
-            tmp_log.info(self.log_string)
-        else:
-            tmp_log.debug(self.log_string)
 
     @property
     def has_metadata(self):
diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py
index cef4c8d..aca99f8 100644
--- a/kittengroomer/helpers.py
+++ b/kittengroomer/helpers.py
@@ -42,28 +42,29 @@ class FileBase(object):
         self.src_path = src_path
         self.dst_path = dst_path
         self.filename = os.path.basename(self.src_path)
-        self.log_string = ''
         self.logger = logger
-        self.symlink = None
-        self.extension = self._determine_extension()
-        self.mimetype = self._determine_mimetype()
-        self.should_copy = True
-        if self.mimetype:
-            self.main_type, self.sub_type = self._split_subtypes(self.mimetype)
         self._file_props = {
             'filepath': self.src_path,
             'filename': self.filename,
-            'maintype': self.main_type,
-            'subtype': self.sub_type,
-            'extension': self.extension,
             'file_size': self.size,
+            'maintype': None,
+            'subtype': None,
+            'extension': None,
             'safety_category': None,
-            'symlink': self.symlink,
+            'symlink': False,
             'copied': False,
             'file_string_set': set(),
             'errors': {},
             'user_defined': {}
         }
+        self.extension = self._determine_extension()
+        self.set_property('extension', self.extension)
+        self.mimetype = self._determine_mimetype()
+        self.should_copy = True
+        if self.mimetype:
+            self.main_type, self.sub_type = self._split_subtypes(self.mimetype)
+        self.set_property('maintype', self.main_type)
+        self.set_property('subtype', self.sub_type)
 
     def _determine_extension(self):
         _, ext = os.path.splitext(self.src_path)
@@ -76,13 +77,14 @@ class FileBase(object):
         if os.path.islink(self.src_path):
             # magic will throw an IOError on a broken symlink
             mimetype = 'inode/symlink'
-            self.symlink = os.readlink(self.src_path)
+            self.set_property('symlink', os.readlink(self.src_path))
         else:
             try:
                 mt = magic.from_file(self.src_path, mime=True)
                 # Note: magic will always return something, even if it's just 'data'
             except UnicodeEncodeError as e:
                 # FIXME: The encoding of the file is broken (possibly UTF-16)
+                # One of the Travis files will trigger this.
                 self.add_error(e, '')
                 mt = None
             try:
@@ -134,7 +136,7 @@ class FileBase(object):
 
     def is_symlink(self):
         """Returns True and updates log if file is a symlink."""
-        if self._file_props['symlink'] is None:
+        if self._file_props['symlink'] is False:
             return False
         else:
             return True

From 18857da7caa31a173906f984fc2963905eb67869 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Wed, 8 Mar 2017 23:22:53 -0500
Subject: [PATCH 28/36] Several small bugfixes

* Fix issue with main/subtypes in init
* Fix bug in File.check() in filecheck.py
* Fix FileBase.size for symlinks
---
 bin/filecheck.py         |  6 ++++--
 kittengroomer/helpers.py | 14 +++++++++++---
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/bin/filecheck.py b/bin/filecheck.py
index 396d8b3..144aed9 100644
--- a/bin/filecheck.py
+++ b/bin/filecheck.py
@@ -162,8 +162,10 @@ class File(FileBase):
 
     def check(self):
         self._check_dangerous()
-        self._check_extension()
-        self._check_mimetype()
+        if self.has_extension():
+            self._check_extension()
+        if self.has_mimetype():
+            self._check_mimetype()
         if not self.is_dangerous():
             self.mime_processing_options.get(self.main_type, self.unknown)()
 
diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py
index aca99f8..cf6309f 100644
--- a/kittengroomer/helpers.py
+++ b/kittengroomer/helpers.py
@@ -61,10 +61,14 @@ class FileBase(object):
         self.set_property('extension', self.extension)
         self.mimetype = self._determine_mimetype()
         self.should_copy = True
+        self.main_type = None
+        self.sub_type = None
         if self.mimetype:
             self.main_type, self.sub_type = self._split_subtypes(self.mimetype)
-        self.set_property('maintype', self.main_type)
-        self.set_property('subtype', self.sub_type)
+            if self.main_type:
+                self.set_property('maintype', self.main_type)
+            if self.sub_type:
+                self.set_property('subtype', self.sub_type)
 
     def _determine_extension(self):
         _, ext = os.path.splitext(self.src_path)
@@ -102,7 +106,11 @@ class FileBase(object):
 
     @property
     def size(self):
-        return os.path.getsize(self.src_path)
+        try:
+            size = os.path.getsize(self.src_path)
+        except FileNotFoundError:
+            size = 0
+        return size
 
     def has_mimetype(self):
         """Returns True if file has a full mimetype, else False."""

From 484c71fc860a1a435d070144a594acf03fb242e8 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Thu, 9 Mar 2017 01:41:42 -0500
Subject: [PATCH 29/36] Turn off copying for certain mimes in filecheck

---
 bin/filecheck.py         | 4 ++++
 kittengroomer/helpers.py | 1 +
 2 files changed, 5 insertions(+)

diff --git a/bin/filecheck.py b/bin/filecheck.py
index 144aed9..171b5ff 100644
--- a/bin/filecheck.py
+++ b/bin/filecheck.py
@@ -205,18 +205,22 @@ class File(FileBase):
             self.add_file_string('Symlink to {}'.format(symlink_path))
         else:
             self.add_file_string('Inode file')
+        self.should_copy = False
 
     def unknown(self):
         """Main type should never be unknown."""
         self.add_file_string('Unknown file')
+        self.should_copy = False
 
     def example(self):
         """Used in examples, should never be returned by libmagic."""
         self.add_file_string('Example file')
+        self.should_copy = False
 
     def multipart(self):
         """Used in web apps, should never be returned by libmagic"""
         self.add_file_string('Multipart file')
+        self.should_copy = False
 
     # ##### Treated as malicious, no reason to have it on a USB key ######
     def message(self):
diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py
index cf6309f..a358813 100644
--- a/kittengroomer/helpers.py
+++ b/kittengroomer/helpers.py
@@ -321,6 +321,7 @@ class KittenGroomerBase(object):
             dst_path, filename = os.path.split(dst)
             self.safe_mkdir(dst_path)
             shutil.copy(src, dst)
+            self.set_property('copied', True)
         except Exception as e:
             self.add_error(e, '')
 

From e73721e95fc6e7f4f261685cdb559e77547f1510 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Thu, 9 Mar 2017 01:53:44 -0500
Subject: [PATCH 30/36] Fix bug with safe_copy

---
 bin/filecheck.py         | 1 +
 kittengroomer/helpers.py | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/filecheck.py b/bin/filecheck.py
index 171b5ff..acc160d 100644
--- a/bin/filecheck.py
+++ b/bin/filecheck.py
@@ -500,6 +500,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
             self.process_archive(file)
         elif file.should_copy:
             self.safe_copy(file.src_path, file.dst_path)
+            file.set_property('copied', True)
         file.write_log()
         if hasattr(file, 'tempdir_path'):
             self.safe_rmtree(file.tempdir_path)
diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py
index a358813..cf6309f 100644
--- a/kittengroomer/helpers.py
+++ b/kittengroomer/helpers.py
@@ -321,7 +321,6 @@ class KittenGroomerBase(object):
             dst_path, filename = os.path.split(dst)
             self.safe_mkdir(dst_path)
             shutil.copy(src, dst)
-            self.set_property('copied', True)
         except Exception as e:
             self.add_error(e, '')
 

From 59cde8cfd5de1b5b097fd6c30a06e64d752cfbc0 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Thu, 9 Mar 2017 13:48:07 -0500
Subject: [PATCH 31/36] Move safe_copy to FileBase

---
 bin/filecheck.py         |  2 +-
 kittengroomer/helpers.py | 30 ++++++++++++++----------------
 2 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/bin/filecheck.py b/bin/filecheck.py
index acc160d..157c83f 100644
--- a/bin/filecheck.py
+++ b/bin/filecheck.py
@@ -499,7 +499,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
         if file.is_recursive:
             self.process_archive(file)
         elif file.should_copy:
-            self.safe_copy(file.src_path, file.dst_path)
+            file.safe_copy()
             file.set_property('copied', True)
         file.write_log()
         if hasattr(file, 'tempdir_path'):
diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py
index cf6309f..f422be5 100644
--- a/kittengroomer/helpers.py
+++ b/kittengroomer/helpers.py
@@ -201,6 +201,20 @@ class FileBase(object):
         path, filename = os.path.split(self.dst_path)
         self.dst_path = os.path.join(path, '{}.bin'.format(filename))
 
+    def safe_copy(self, src=None, dst=None):
+        """Copy a file and create directory if needed."""
+        if src is None:
+            src = self.src_path
+        if dst is None:
+            dst = self.dst_path
+        try:
+            dst_path, filename = os.path.split(dst)
+            if not os.path.exists(dst_path):
+                os.makedirs(dst_path)
+            shutil.copy(src, dst)
+        except Exception as e:
+            self.add_error(e, '')
+
     def force_ext(self, ext):
         """If dst_path does not end in ext, changes it and edits _file_props."""
         if not self.dst_path.endswith(ext):
@@ -281,9 +295,6 @@ class GroomerLogger(object):
         # return a sublog for the file
         pass
 
-    def add_event(self, event, log_level):
-        pass
-
 
 class KittenGroomerBase(object):
     """Base object responsible for copy/sanitization process."""
@@ -311,19 +322,6 @@ class KittenGroomerBase(object):
         if not os.path.exists(directory):
             os.makedirs(directory)
 
-    def safe_copy(self, src=None, dst=None):
-        """Copy a file and create directory if needed."""
-        if src is None:
-            src = self.cur_file.src_path
-        if dst is None:
-            dst = self.cur_file.dst_path
-        try:
-            dst_path, filename = os.path.split(dst)
-            self.safe_mkdir(dst_path)
-            shutil.copy(src, dst)
-        except Exception as e:
-            self.add_error(e, '')
-
     def list_all_files(self, directory):
         """Generator yielding path to all of the files in a directory tree."""
         for root, dirs, files in os.walk(directory):

From 963a2feef4643b0d8334a7a0d9cc17d86e360d7a Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Fri, 10 Mar 2017 13:13:38 -0500
Subject: [PATCH 32/36] Change various methods to properties

---
 bin/filecheck.py            | 35 +++++++++------------
 kittengroomer/helpers.py    | 17 +++++++---
 tests/test_kittengroomer.py | 63 +++++++++++++++++--------------------
 3 files changed, 56 insertions(+), 59 deletions(-)

diff --git a/bin/filecheck.py b/bin/filecheck.py
index 157c83f..8af9c2c 100644
--- a/bin/filecheck.py
+++ b/bin/filecheck.py
@@ -120,9 +120,9 @@ class File(FileBase):
         }
 
     def _check_dangerous(self):
-        if not self.has_mimetype():
+        if not self.has_mimetype:
             self.make_dangerous('no mimetype')
-        if not self.has_extension():
+        if not self.has_extension:
             self.make_dangerous('no extension')
         if self.extension in Config.malicious_exts:
             self.make_dangerous('malicious_extension')
@@ -156,17 +156,17 @@ class File(FileBase):
         expected_extensions = mimetypes.guess_all_extensions(mimetype,
                                                              strict=False)
         if expected_extensions:
-            if self.has_extension() and self.extension not in expected_extensions:
+            if self.has_extension and self.extension not in expected_extensions:
                 # LOG: improve this string
                 self.make_dangerous('expected extensions')
 
     def check(self):
         self._check_dangerous()
-        if self.has_extension():
+        if self.has_extension:
             self._check_extension()
-        if self.has_mimetype():
+        if self.has_mimetype:
             self._check_mimetype()
-        if not self.is_dangerous():
+        if not self.is_dangerous:
             self.mime_processing_options.get(self.main_type, self.unknown)()
 
     # ##### Helper functions #####
@@ -178,11 +178,6 @@ class File(FileBase):
                 dict_to_return[subtype] = method
         return dict_to_return
 
-    def write_log(self):
-        """Print the logs related to the current file being processed."""
-        # LOG: move to helpers.py GroomerLogger and modify
-        tmp_log = self.logger.log.fields(**self._file_props)
-
     @property
     def has_metadata(self):
         if self.mimetype in Config.mimes_metadata:
@@ -200,7 +195,7 @@ class File(FileBase):
     # ##### Discarded mimetypes, reason in the docstring ######
     def inode(self):
         """Empty file or symlink."""
-        if self.is_symlink():
+        if self.is_symlink:
             symlink_path = self.get_property('symlink')
             self.add_file_string('Symlink to {}'.format(symlink_path))
         else:
@@ -479,19 +474,19 @@ class KittenGroomerFileCheck(KittenGroomerBase):
 
     def process_dir(self, src_dir, dst_dir):
         """Main function coordinating file processing."""
-        # LOG: we probably want to move this write_log elsewhere:
+        # LOG: what's the purpose of this write log?:
         # if self.recursive_archive_depth > 0:
         #     self.write_log()
-        # TODO: Can we clean up the way we handle relative_path?
+        # We're writing the log here because...
+        # How exactly does the workflow work with an archive?
         for srcpath in self.list_all_files(src_dir):
             dstpath = srcpath.replace(src_dir, dst_dir)
-            relative_path = srcpath.replace(src_dir + '/', '')
+            # TODO: Can we clean up the way we handle relative_path?
+            # Relative path is here so that when we print files in the log it
+            # shows only the file's path. Should we just pass it to the logger
+            # when we create it? Or let the logger figure it out?
+            # relative_path = srcpath.replace(src_dir + '/', '')
             self.cur_file = File(srcpath, dstpath, self.logger)
-            # LOG: move this logging code elsewhere
-            self.logger.log.info('Processing {} ({}/{})',
-                                 relative_path,
-                                 self.cur_file.main_type,
-                                 self.cur_file.sub_type)
             self.process_file(self.cur_file)
 
     def process_file(self, file):
diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py
index f422be5..9f80b72 100644
--- a/kittengroomer/helpers.py
+++ b/kittengroomer/helpers.py
@@ -112,6 +112,7 @@ class FileBase(object):
             size = 0
         return size
 
+    @property
     def has_mimetype(self):
         """Returns True if file has a full mimetype, else False."""
         if not self.main_type or not self.sub_type:
@@ -121,6 +122,7 @@ class FileBase(object):
         else:
             return True
 
+    @property
     def has_extension(self):
         """Returns True if self.extension is set, else False."""
         if self.extension is None:
@@ -130,18 +132,22 @@ class FileBase(object):
         else:
             return True
 
+    @property
     def is_dangerous(self):
         """True if file has been marked 'dangerous' else False."""
         return self._file_props['safety_category'] is 'dangerous'
 
+    @property
     def is_unknown(self):
         """True if file has been marked 'unknown' else False."""
         return self._file_props['safety_category'] is 'unknown'
 
+    @property
     def is_binary(self):
         """True if file has been marked 'binary' else False."""
         return self._file_props['safety_category'] is 'binary'
 
+    @property
     def is_symlink(self):
         """Returns True and updates log if file is a symlink."""
         if self._file_props['symlink'] is False:
@@ -178,7 +184,7 @@ class FileBase(object):
         Prepends and appends DANGEROUS to the destination file name
         to help prevent double-click of death.
         """
-        if self.is_dangerous():
+        if self.is_dangerous:
             return
         self.set_property('safety_category', 'dangerous')
         # LOG: store reason string
@@ -187,7 +193,7 @@ class FileBase(object):
 
     def make_unknown(self):
         """Marks a file as an unknown type and prepends UNKNOWN to filename."""
-        if self.is_dangerous() or self.is_binary():
+        if self.is_dangerous or self.is_binary:
             return
         self.set_property('safety_category', 'unknown')
         path, filename = os.path.split(self.dst_path)
@@ -195,7 +201,7 @@ class FileBase(object):
 
     def make_binary(self):
         """Marks a file as a binary and appends .bin to filename."""
-        if self.is_dangerous():
+        if self.is_dangerous:
             return
         self.set_property('safety_category', 'binary')
         path, filename = os.path.split(self.dst_path)
@@ -242,6 +248,10 @@ class FileBase(object):
             self.add_error(e, '')
             return False
 
+    def write_log(self):
+        """Print the logs related to the current file being processed."""
+        tmp_log = self.logger.log.fields(**self._file_props)
+
 
 class GroomerLogger(object):
     """Groomer logging interface"""
@@ -291,7 +301,6 @@ class GroomerLogger(object):
         return s.hexdigest()
 
     def add_file(self, file):
-        # File object will add itself?
         # return a sublog for the file
         pass
 
diff --git a/tests/test_kittengroomer.py b/tests/test_kittengroomer.py
index 41f9a40..940136d 100644
--- a/tests/test_kittengroomer.py
+++ b/tests/test_kittengroomer.py
@@ -104,23 +104,23 @@ class TestFileBase:
         assert generic_conf_file.mimetype == 'text/plain'
         assert generic_conf_file.main_type == 'text'
         assert generic_conf_file.sub_type == 'plain'
-        assert generic_conf_file.has_mimetype()
+        assert generic_conf_file.has_mimetype
         # Need to test something without a mimetype
         # Need to test something that's a directory
         # Need to test something that causes the unicode exception
 
     def test_has_mimetype_no_main_type(self, generic_conf_file):
         generic_conf_file.main_type = ''
-        assert generic_conf_file.has_mimetype() is False
+        assert generic_conf_file.has_mimetype is False
 
     def test_has_mimetype_no_sub_type(self, generic_conf_file):
         generic_conf_file.sub_type = ''
-        assert generic_conf_file.has_mimetype() is False
+        assert generic_conf_file.has_mimetype is False
 
     def test_has_extension(self, temp_file, temp_file_no_ext):
-        assert temp_file.has_extension() is True
+        assert temp_file.has_extension is True
         print(temp_file_no_ext.extension)
-        assert temp_file_no_ext.has_extension() is False
+        assert temp_file_no_ext.has_extension is False
 
     def test_set_property(self, generic_conf_file):
         generic_conf_file.set_property('test', True)
@@ -129,14 +129,14 @@ class TestFileBase:
 
     def test_marked_dangerous(self, file_marked_all_parameterized):
         file_marked_all_parameterized.make_dangerous()
-        assert file_marked_all_parameterized.is_dangerous() is True
+        assert file_marked_all_parameterized.is_dangerous is True
         # Should work regardless of weird paths??
         # Should check file path alteration behavior as well
 
     def test_generic_dangerous(self, generic_conf_file):
-        assert generic_conf_file.is_dangerous() is False
+        assert generic_conf_file.is_dangerous is False
         generic_conf_file.make_dangerous()
-        assert generic_conf_file.is_dangerous() is True
+        assert generic_conf_file.is_dangerous is True
 
     def test_has_symlink(self, tmpdir):
         file_path = tmpdir.join('test.txt')
@@ -147,45 +147,45 @@ class TestFileBase:
         os.symlink(file_path, symlink_path)
         file = FileBase(file_path, file_path)
         symlink = FileBase(symlink_path, symlink_path)
-        assert file.is_symlink() is False
-        assert symlink.is_symlink() is True
+        assert file.is_symlink is False
+        assert symlink.is_symlink is True
 
     def test_has_symlink_fixture(self, symlink_file):
-        assert symlink_file.is_symlink() is True
+        assert symlink_file.is_symlink is True
 
     def test_generic_make_unknown(self, generic_conf_file):
-        assert generic_conf_file.is_unknown() is False
+        assert generic_conf_file.is_unknown is False
         generic_conf_file.make_unknown()
-        assert generic_conf_file.is_unknown()
+        assert generic_conf_file.is_unknown
         # given a FileBase object with no marking, should do the right things
 
     def test_marked_make_unknown(self, file_marked_all_parameterized):
         file = file_marked_all_parameterized
-        if file.is_unknown():
+        if file.is_unknown:
             file.make_unknown()
-            assert file.is_unknown()
+            assert file.is_unknown
         else:
-            assert file.is_unknown() is False
+            assert file.is_unknown is False
             file.make_unknown()
-            assert file.is_unknown() is False
+            assert file.is_unknown is False
         # given a FileBase object with an unrecognized marking, should ???
 
     def test_generic_make_binary(self, generic_conf_file):
-        assert generic_conf_file.is_binary() is False
+        assert generic_conf_file.is_binary is False
         generic_conf_file.make_binary()
-        assert generic_conf_file.is_binary() is True
+        assert generic_conf_file.is_binary
 
     def test_marked_make_binary(self, file_marked_all_parameterized):
         file = file_marked_all_parameterized
-        if file.is_dangerous():
+        if file.is_dangerous:
             file.make_binary()
-            assert file.is_binary() is False
+            assert file.is_binary is False
         else:
             file.make_binary()
-            assert file.is_binary()
+            assert file.is_binary
 
     def test_force_ext_change(self, generic_conf_file):
-        assert generic_conf_file.has_extension()
+        assert generic_conf_file.has_extension
         assert generic_conf_file.get_property('extension') == '.conf'
         assert os.path.splitext(generic_conf_file.dst_path)[1] == '.conf'
         generic_conf_file.force_ext('.txt')
@@ -195,7 +195,7 @@ class TestFileBase:
         # should be able to handle weird paths
 
     def test_force_ext_correct(self, generic_conf_file):
-        assert generic_conf_file.has_extension()
+        assert generic_conf_file.has_extension
         assert generic_conf_file.get_property('extension') == '.conf'
         generic_conf_file.force_ext('.conf')
         assert os.path.splitext(generic_conf_file.dst_path)[1] == '.conf'
@@ -212,6 +212,10 @@ class TestFileBase:
         # if metadata file already exists
         # if there is no metadata to write should this work?
 
+    def test_safe_copy(self, generic_conf_file):
+        generic_conf_file.safe_copy()
+        # check that safe copy can handle weird file path inputs
+
 
 class TestLogger:
 
@@ -246,17 +250,6 @@ class TestKittenGroomerBase:
                                           dest_directory,
                                           debug=True)
 
-    def test_safe_copy(self, tmpdir):
-        file = tmpdir.join('test.txt')
-        file.write('testing')
-        testdir = tmpdir.join('testdir')
-        os.mkdir(testdir.strpath)
-        filedest = testdir.join('test.txt')
-        simple_groomer = KittenGroomerBase(tmpdir.strpath, testdir.strpath)
-        simple_groomer.cur_file = FileBase(file.strpath, filedest.strpath)
-        simple_groomer.safe_copy()
-        # check that safe copy can handle weird file path inputs
-
     def test_list_all_files(self, tmpdir):
         file = tmpdir.join('test.txt')
         file.write('testing')

From 0175ee48e594a77fd0bd115171b9c13c494d8119 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Tue, 14 Mar 2017 10:41:31 -0400
Subject: [PATCH 33/36] Add TODOs and clarify various logging messages

---
 bin/filecheck.py         | 57 +++++++++++++++++-----------------------
 kittengroomer/helpers.py | 20 +++++++-------
 2 files changed, 33 insertions(+), 44 deletions(-)

diff --git a/bin/filecheck.py b/bin/filecheck.py
index 8af9c2c..91b494e 100644
--- a/bin/filecheck.py
+++ b/bin/filecheck.py
@@ -143,7 +143,7 @@ class File(FileBase):
         is_known_extension = self.extension in mimetypes.types_map.keys()
         if is_known_extension and expected_mimetype != self.mimetype:
             # LOG: improve this string
-            self.make_dangerous('expected_mimetyped')
+            self.make_dangerous('expected_mimetype')
 
     def _check_mimetype(self):
         """Takes the mimetype (as determined by libmagic) and determines
@@ -249,7 +249,8 @@ class File(FileBase):
         """Processes an application specific file according to its subtype."""
         for subtype, method in self.app_subtype_methods.items():
             if subtype in self.sub_type:
-                # TODO: should this return a value?
+                # TODO: should we change the logic so we don't iterate through all of the subtype methods?
+                # TODO: should these methods return a value?
                 method()
                 self.add_file_string('Application file')
                 return
@@ -258,16 +259,15 @@ class File(FileBase):
 
     def _executables(self):
         """Processes an executable file."""
-        # LOG: change this property
+        # LOG: change the processing_type property to some other name or include in file_string
         self.set_property('processing_type', 'executable')
         self.make_dangerous('executable')
 
     def _winoffice(self):
         """Processes a winoffice file using olefile/oletools."""
-        # LOG: change this property
+        # LOG: processing_type property
         self.set_property('processing_type', 'WinOffice')
-        # Try as if it is a valid document
-        oid = oletools.oleid.OleID(self.src_path)
+        oid = oletools.oleid.OleID(self.src_path)  # First assume a valid file
         if not olefile.isOleFile(self.src_path):
             # Manual processing, may already count as suspicious
             try:
@@ -291,12 +291,14 @@ class File(FileBase):
             for i in indicators:
                 if i.id == 'ObjectPool' and i.value:
                     # TODO: Is it suspicious?
+                    # LOG: user defined property
                     self.set_property('objpool', True)
                 elif i.id == 'flash' and i.value:
                     self.make_dangerous('flash')
 
     def _ooxml(self):
         """Processes an ooxml file."""
+        # LOG: processing_type property
         self.set_property('processing_type', 'ooxml')
         try:
             doc = officedissector.doc.Document(self.src_path)
@@ -318,7 +320,7 @@ class File(FileBase):
     def _libreoffice(self):
         """Processes a libreoffice file."""
         self.set_property('processing_type', 'libreoffice')
-        # As long as there ar no way to do a sanity check on the files => dangerous
+        # As long as there is no way to do a sanity check on the files => dangerous
         try:
             lodoc = zipfile.ZipFile(self.src_path, 'r')
         except:
@@ -332,6 +334,7 @@ class File(FileBase):
 
     def _pdf(self):
         """Processes a PDF file."""
+        # LOG: processing_type property
         self.set_property('processing_type', 'pdf')
         xmlDoc = PDFiD(self.src_path)
         oPDFiD = cPDFiD(xmlDoc, True)
@@ -352,9 +355,10 @@ class File(FileBase):
         temporary directory and self.process_dir is called on that directory.
         The recursive archive depth is increased to protect against archive
         bombs."""
+        # LOG: change this to something archive specific
         self.set_property('processing_type', 'archive')
+        self.should_copy = False
         self.is_recursive = True
-        # self.log_string += 'Archive extracted, processing content.'
 
     def _unknown_app(self):
         """Processes an unknown file."""
@@ -367,10 +371,9 @@ class File(FileBase):
     #######################
     # Metadata extractors
     def _metadata_exif(self, metadata_file_path):
-        # TODO: this method is kind of long, can we shorten it?
+        # TODO: this method is kind of long, can we shorten it somehow?
         img = open(self.src_path, 'rb')
         tags = None
-
         try:
             tags = exifread.process_file(img, debug=True)
         except Exception as e:
@@ -382,19 +385,17 @@ class File(FileBase):
                 self.add_error(e, "Failed to get any metadata for file {}.".format(self.src_path))
                 img.close()
                 return False
-
         for tag in sorted(tags.keys()):
-            # These are long and obnoxious/binary
+            # These tags are long and obnoxious/binary so we don't add them
             if tag not in ('JPEGThumbnail', 'TIFFThumbnail'):
-                printable = str(tags[tag])
-
+                tag_string = str(tags[tag])
                 # Exifreader truncates data.
-                if len(printable) > 25 and printable.endswith(", ... ]"):
-                    value = tags[tag].values
-                    printable = str(value)
-
+                if len(tag_string) > 25 and tag_string.endswith(", ... ]"):
+                    tag_value = tags[tag].values
+                    tag_string = str(tag_value)
                 with open(metadata_file_path, 'w+') as metadata_file:
-                    metadata_file.write("Key: {}\tValue: {}\n".format(tag, printable))
+                    metadata_file.write("Key: {}\tValue: {}\n".format(tag, tag_string))
+        # LOG: how do we want to log metadata?
         self.set_property('metadata', 'exif')
         img.close()
         return True
@@ -408,10 +409,12 @@ class File(FileBase):
                 if tag not in ('icc_profile'):
                     with open(metadata_file_path, 'w+') as metadata_file:
                         metadata_file.write("Key: {}\tValue: {}\n".format(tag, img.info[tag]))
+            # LOG: handle metadata
             self.set_property('metadata', 'png')
             img.close()
         # Catch decompression bombs
         except Exception as e:
+            # TODO: only catch DecompressionBombWarnings here?
             self.add_error(e, "Caught exception processing metadata for {}".format(self.src_path))
             self.make_dangerous('exception processing metadata')
             return False
@@ -474,11 +477,6 @@ class KittenGroomerFileCheck(KittenGroomerBase):
 
     def process_dir(self, src_dir, dst_dir):
         """Main function coordinating file processing."""
-        # LOG: what's the purpose of this write log?:
-        # if self.recursive_archive_depth > 0:
-        #     self.write_log()
-        # We're writing the log here because...
-        # How exactly does the workflow work with an archive?
         for srcpath in self.list_all_files(src_dir):
             dstpath = srcpath.replace(src_dir, dst_dir)
             # TODO: Can we clean up the way we handle relative_path?
@@ -506,9 +504,9 @@ class KittenGroomerFileCheck(KittenGroomerBase):
         Should be given a Kittengroomer file object whose src_path points
         to an archive."""
         self.recursive_archive_depth += 1
-        # Check for archivebomb
+        # LOG: write_log or somehow log the archive file here
         if self.recursive_archive_depth >= self.max_recursive_depth:
-            self._handle_archivebomb(file)
+            file.make_dangerous('Archive bomb')
         else:
             tempdir_path = file.make_tempdir()
             command_str = '{} -p1 x "{}" -o"{}" -bd -aoa'
@@ -521,13 +519,6 @@ class KittenGroomerFileCheck(KittenGroomerBase):
             self.safe_rmtree(tempdir_path)
         self.recursive_archive_depth -= 1
 
-    def _handle_archivebomb(self, file):
-        file.make_dangerous('Archive bomb')
-        self.logger.log.warning('ARCHIVE BOMB.')
-        self.logger.log.warning('The content of the archive contains recursively other archives.')
-        self.logger.log.warning('This is a bad sign so the archive is not extracted to the destination key.')
-        # TODO: delete whatever we want to delete that's already been copied to dest dir
-
     def _run_process(self, command_string, timeout=None):
         """Run command_string in a subprocess, wait until it finishes."""
         args = shlex.split(command_string)
diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py
index 9f80b72..465ad4a 100644
--- a/kittengroomer/helpers.py
+++ b/kittengroomer/helpers.py
@@ -88,7 +88,7 @@ class FileBase(object):
                 # Note: magic will always return something, even if it's just 'data'
             except UnicodeEncodeError as e:
                 # FIXME: The encoding of the file is broken (possibly UTF-16)
-                # One of the Travis files will trigger this.
+                # Note: one of the Travis files will trigger this exception
                 self.add_error(e, '')
                 mt = None
             try:
@@ -115,9 +115,9 @@ class FileBase(object):
     @property
     def has_mimetype(self):
         """Returns True if file has a full mimetype, else False."""
+        # TODO: broken mimetype checks should be done somewhere else.
+        # Should the check be by default or should we let the API consumer write it?
         if not self.main_type or not self.sub_type:
-            # LOG: log this as an error, move somewhere else?
-            # self._file_props.update({'broken_mime': True})
             return False
         else:
             return True
@@ -126,8 +126,6 @@ class FileBase(object):
     def has_extension(self):
         """Returns True if self.extension is set, else False."""
         if self.extension is None:
-            # TODO: do we actually want to have a seperate prop for no extension?
-            # self.set_property('no_extension', True)
             return False
         else:
             return True
@@ -163,7 +161,7 @@ class FileBase(object):
             self._file_props['user_defined'][prop_string] = value
 
     def get_property(self, file_prop):
-        # FIXME: needs to be refactored
+        # TODO: could probably be refactored
         if file_prop in self._file_props:
             return self._file_props[file_prop]
         elif file_prop in self._file_props['user_defined']:
@@ -187,7 +185,7 @@ class FileBase(object):
         if self.is_dangerous:
             return
         self.set_property('safety_category', 'dangerous')
-        # LOG: store reason string
+        # LOG: store reason string somewhere and do something with it
         path, filename = os.path.split(self.dst_path)
         self.dst_path = os.path.join(path, 'DANGEROUS_{}_DANGEROUS'.format(filename))
 
@@ -250,11 +248,12 @@ class FileBase(object):
 
     def write_log(self):
         """Print the logs related to the current file being processed."""
-        tmp_log = self.logger.log.fields(**self._file_props)
+        file_log = self.logger.add_file(self)
+        file_log.fields(**self._file_props)
 
 
 class GroomerLogger(object):
-    """Groomer logging interface"""
+    """Groomer logging interface."""
 
     def __init__(self, root_dir_path, debug=False):
         self.root_dir = root_dir_path
@@ -301,8 +300,7 @@ class GroomerLogger(object):
         return s.hexdigest()
 
     def add_file(self, file):
-        # return a sublog for the file
-        pass
+        return self.log.name('file.src_path')
 
 
 class KittenGroomerBase(object):

From ac94cf5d6d40673e43cde8ff716f402a400d4321 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Tue, 14 Mar 2017 19:53:23 -0400
Subject: [PATCH 34/36] Change the way test dst dirs are handled

* Each test folder now copies files into its own test directory
* Change gitignore due to dst dir changes
* Make sure logger.tree is called for every directory
---
 .gitignore              |  4 ++++
 bin/filecheck.py        |  5 +++--
 tests/dst/.keepdir      |  0
 tests/test_filecheck.py | 25 +++++++++++++++++++------
 4 files changed, 26 insertions(+), 8 deletions(-)
 delete mode 100644 tests/dst/.keepdir

diff --git a/.gitignore b/.gitignore
index ecf6be3..3f07002 100644
--- a/.gitignore
+++ b/.gitignore
@@ -68,7 +68,11 @@ target/
 
 # Project specific
 tests/dst/*
+tests/*_dst
 tests/test_logs/*
 !tests/**/.keepdir
 !tests/src_invalid/*
 !tests/src_valid/*
+pdfid.py
+# Plugins are pdfid stuff
+plugin_*
diff --git a/bin/filecheck.py b/bin/filecheck.py
index 91b494e..6513442 100644
--- a/bin/filecheck.py
+++ b/bin/filecheck.py
@@ -477,6 +477,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
 
     def process_dir(self, src_dir, dst_dir):
         """Main function coordinating file processing."""
+        self.logger.tree(src_dir)
         for srcpath in self.list_all_files(src_dir):
             dstpath = srcpath.replace(src_dir, dst_dir)
             # TODO: Can we clean up the way we handle relative_path?
@@ -509,12 +510,12 @@ class KittenGroomerFileCheck(KittenGroomerBase):
             file.make_dangerous('Archive bomb')
         else:
             tempdir_path = file.make_tempdir()
+            # TODO: double check we are properly escaping file.src_path
+            # otherwise we are running unvalidated user input directly in the shell
             command_str = '{} -p1 x "{}" -o"{}" -bd -aoa'
             unpack_command = command_str.format(SEVENZ_PATH,
                                                 file.src_path, tempdir_path)
             self._run_process(unpack_command)
-            # LOG: check that tree is working correctly here
-            self.logger.tree(tempdir_path)
             self.process_dir(tempdir_path, file.dst_path)
             self.safe_rmtree(tempdir_path)
         self.recursive_archive_depth -= 1
diff --git a/tests/dst/.keepdir b/tests/dst/.keepdir
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/test_filecheck.py b/tests/test_filecheck.py
index f187523..f58152d 100644
--- a/tests/test_filecheck.py
+++ b/tests/test_filecheck.py
@@ -2,6 +2,7 @@
 # -*- coding: utf-8 -*-
 
 import os
+import shutil
 
 import pytest
 
@@ -20,25 +21,27 @@ skipif_nodeps = pytest.mark.skipif(NODEPS,
 class TestIntegration:
 
     @pytest.fixture
-    def src_valid(self):
+    def src_valid_path(self):
         return os.path.join(os.getcwd(), 'tests/src_valid')
 
     @pytest.fixture
-    def src_invalid(self):
+    def src_invalid_path(self):
         return os.path.join(os.getcwd(), 'tests/src_invalid')
 
     @pytest.fixture
     def dst(self):
         return os.path.join(os.getcwd(), 'tests/dst')
 
-    def test_filecheck(self, src_invalid, dst):
-        groomer = KittenGroomerFileCheck(src_invalid, dst, debug=True)
+    def test_filecheck_src_invalid(self, src_invalid_path):
+        dst_path = self.make_dst_dir_path(src_invalid_path)
+        groomer = KittenGroomerFileCheck(src_invalid_path, dst_path, debug=True)
         groomer.run()
         test_description = "filecheck_invalid"
         save_logs(groomer, test_description)
 
-    def test_filecheck_2(self, src_valid, dst):
-        groomer = KittenGroomerFileCheck(src_valid, dst, debug=True)
+    def test_filecheck_2(self, src_valid_path):
+        dst_path = self.make_dst_dir_path(src_valid_path)
+        groomer = KittenGroomerFileCheck(src_valid_path, dst_path, debug=True)
         groomer.run()
         test_description = "filecheck_valid"
         save_logs(groomer, test_description)
@@ -46,8 +49,18 @@ class TestIntegration:
     def test_processdir(self):
         pass
 
+    def test_handle_archives(self):
+        pass
+
+    def make_dst_dir_path(self, src_dir_path):
+        dst_path = src_dir_path + '_dst'
+        shutil.rmtree(dst_path, ignore_errors=True)
+        os.makedirs(dst_path, exist_ok=True)
+        return dst_path
+
 
 class TestFileHandling:
     def test_autorun(self):
         # Run on a single autorun file, confirm that it gets flagged as dangerous
+        # TODO: build out these and other methods for individual file cases
         pass

From 4d8a1d1dafd7ef453e5a8653215c7ab214edcad6 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Wed, 15 Mar 2017 22:29:51 -0400
Subject: [PATCH 35/36] Add/update docstrings for filecheck and helpers

---
 bin/filecheck.py         | 64 ++++++++++++++++++++++++++--------------
 kittengroomer/helpers.py | 62 +++++++++++++++++++++++---------------
 2 files changed, 80 insertions(+), 46 deletions(-)

diff --git a/bin/filecheck.py b/bin/filecheck.py
index 6513442..3dc26dd 100644
--- a/bin/filecheck.py
+++ b/bin/filecheck.py
@@ -23,6 +23,8 @@ SEVENZ_PATH = '/usr/bin/7z'
 
 
 class Config:
+    """Configuration information for Filecheck."""
+
     # Application subtypes (mimetype: 'application/<subtype>')
     mimes_ooxml = ['vnd.openxmlformats-officedocument.']
     mimes_office = ['msword', 'vnd.ms-']
@@ -180,12 +182,13 @@ class File(FileBase):
 
     @property
     def has_metadata(self):
+        """True if filetype typically contains metadata, else False."""
         if self.mimetype in Config.mimes_metadata:
             return True
         return False
 
     def make_tempdir(self):
-        """Make a temporary directory."""
+        """Make a temporary directory at self.tempdir_path."""
         self.tempdir_path = self.dst_path + '_temp'
         if not os.path.exists(self.tempdir_path):
             os.makedirs(self.tempdir_path)
@@ -246,7 +249,7 @@ class File(FileBase):
         self.force_ext('.txt')
 
     def application(self):
-        """Processes an application specific file according to its subtype."""
+        """Process an application specific file according to its subtype."""
         for subtype, method in self.app_subtype_methods.items():
             if subtype in self.sub_type:
                 # TODO: should we change the logic so we don't iterate through all of the subtype methods?
@@ -258,13 +261,13 @@ class File(FileBase):
         self._unknown_app()
 
     def _executables(self):
-        """Processes an executable file."""
+        """Process an executable file."""
         # LOG: change the processing_type property to some other name or include in file_string
         self.set_property('processing_type', 'executable')
         self.make_dangerous('executable')
 
     def _winoffice(self):
-        """Processes a winoffice file using olefile/oletools."""
+        """Process a winoffice file using olefile/oletools."""
         # LOG: processing_type property
         self.set_property('processing_type', 'WinOffice')
         oid = oletools.oleid.OleID(self.src_path)  # First assume a valid file
@@ -297,7 +300,7 @@ class File(FileBase):
                     self.make_dangerous('flash')
 
     def _ooxml(self):
-        """Processes an ooxml file."""
+        """Process an ooxml file."""
         # LOG: processing_type property
         self.set_property('processing_type', 'ooxml')
         try:
@@ -318,7 +321,7 @@ class File(FileBase):
             self.make_dangerous('embedded pack')
 
     def _libreoffice(self):
-        """Processes a libreoffice file."""
+        """Process a libreoffice file."""
         self.set_property('processing_type', 'libreoffice')
         # As long as there is no way to do a sanity check on the files => dangerous
         try:
@@ -333,7 +336,7 @@ class File(FileBase):
                 self.make_dangerous('macro')
 
     def _pdf(self):
-        """Processes a PDF file."""
+        """Process a PDF file."""
         # LOG: processing_type property
         self.set_property('processing_type', 'pdf')
         xmlDoc = PDFiD(self.src_path)
@@ -351,26 +354,30 @@ class File(FileBase):
             self.make_dangerous('launch')
 
     def _archive(self):
-        """Processes an archive using 7zip. The archive is extracted to a
-        temporary directory and self.process_dir is called on that directory.
-        The recursive archive depth is increased to protect against archive
-        bombs."""
+        """
+        Process an archive using 7zip.
+
+        The archive is extracted to a temporary directory and self.process_dir
+        is called on that directory. The recursive archive depth is increased
+        to protect against archive bombs.
+        """
         # LOG: change this to something archive specific
         self.set_property('processing_type', 'archive')
         self.should_copy = False
         self.is_recursive = True
 
     def _unknown_app(self):
-        """Processes an unknown file."""
+        """Process an unknown file."""
         self.make_unknown()
 
     def _binary_app(self):
-        """Processses an unknown binary file."""
+        """Process an unknown binary file."""
         self.make_binary()
 
     #######################
     # Metadata extractors
     def _metadata_exif(self, metadata_file_path):
+        """Read exif metadata from a jpg or tiff file using exifread."""
         # TODO: this method is kind of long, can we shorten it somehow?
         img = open(self.src_path, 'rb')
         tags = None
@@ -401,6 +408,7 @@ class File(FileBase):
         return True
 
     def _metadata_png(self, metadata_file_path):
+        """Extract metadata from a png file using PIL/Pillow."""
         warnings.simplefilter('error', Image.DecompressionBombWarning)
         try:
             img = Image.open(self.src_path)
@@ -420,6 +428,7 @@ class File(FileBase):
             return False
 
     def extract_metadata(self):
+        """Create metadata file and call correct metadata extraction method."""
         metadata_file_path = self.create_metadata_file(".metadata.txt")
         mt = self.mimetype
         metadata_processing_method = self.metadata_mimetype_methods.get(mt)
@@ -430,12 +439,12 @@ class File(FileBase):
     #######################
     # ##### Media - audio and video aren't converted ######
     def audio(self):
-        """Processes an audio file."""
+        """Process an audio file."""
         self.log_string += 'Audio file'
         self._media_processing()
 
     def video(self):
-        """Processes a video."""
+        """Process a video."""
         self.log_string += 'Video file'
         self._media_processing()
 
@@ -444,11 +453,14 @@ class File(FileBase):
         self.set_property('processing_type', 'media')
 
     def image(self):
-        """Processes an image.
+        """
+        Process an image.
 
-        Extracts metadata to dest key if metadata is present. Creates a
-        temporary directory on dest key, opens the using PIL.Image,saves it to
-        the temporary directory, and copies it to the destination."""
+        Extracts metadata to dest key using self.extract_metada() if metadata
+        is present. Creates a temporary directory on dest key, opens the image
+        using PIL.Image, saves it to the temporary directory, and copies it to
+        the destination.
+        """
         # TODO: make sure this method works for png, gif, tiff
         if self.has_metadata:
             self.extract_metadata()
@@ -476,7 +488,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
         self.max_recursive_depth = max_recursive_depth
 
     def process_dir(self, src_dir, dst_dir):
-        """Main function coordinating file processing."""
+        """Process a directory on the source key."""
         self.logger.tree(src_dir)
         for srcpath in self.list_all_files(src_dir):
             dstpath = srcpath.replace(src_dir, dst_dir)
@@ -489,6 +501,12 @@ class KittenGroomerFileCheck(KittenGroomerBase):
             self.process_file(self.cur_file)
 
     def process_file(self, file):
+        """
+        Process an individual file.
+
+        Check the file, handle archives using self.process_archive, copy
+        the file to the destionation key, and clean up temporary directory.
+        """
         file.check()
         if file.is_recursive:
             self.process_archive(file)
@@ -500,10 +518,12 @@ class KittenGroomerFileCheck(KittenGroomerBase):
             self.safe_rmtree(file.tempdir_path)
 
     def process_archive(self, file):
-        """Unpacks an archive using 7zip and processes contents.
+        """
+        Unpack an archive using 7zip and process contents using process_dir.
 
         Should be given a Kittengroomer file object whose src_path points
-        to an archive."""
+        to an archive.
+        """
         self.recursive_archive_depth += 1
         # LOG: write_log or somehow log the archive file here
         if self.recursive_archive_depth >= self.max_recursive_depth:
diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py
index 465ad4a..318f1ca 100644
--- a/kittengroomer/helpers.py
+++ b/kittengroomer/helpers.py
@@ -32,13 +32,18 @@ class ImplementationRequired(KittenGroomerError):
 
 class FileBase(object):
     """
-    Base object for individual files in the source directory. Contains file
-    attributes and various helper methods. Subclass and add attributes
-    or methods relevant to a given implementation.
+    Base object for individual files in the source directory.
+
+    Contains file attributes and various helper methods.
     """
 
     def __init__(self, src_path, dst_path, logger=None):
-        """Initialized with the source path and expected destination path."""
+        """
+        Initialized with the source path and expected destination path.
+
+        self.logger should be a logging object with an add_file method.
+        Create various properties and determine the file's mimetype.
+        """
         self.src_path = src_path
         self.dst_path = dst_path
         self.filename = os.path.basename(self.src_path)
@@ -106,6 +111,7 @@ class FileBase(object):
 
     @property
     def size(self):
+        """Filesize in bytes as an int, 0 if file does not exist."""
         try:
             size = os.path.getsize(self.src_path)
         except FileNotFoundError:
@@ -114,7 +120,7 @@ class FileBase(object):
 
     @property
     def has_mimetype(self):
-        """Returns True if file has a full mimetype, else False."""
+        """True if file has a main and sub mimetype, else False."""
         # TODO: broken mimetype checks should be done somewhere else.
         # Should the check be by default or should we let the API consumer write it?
         if not self.main_type or not self.sub_type:
@@ -124,7 +130,7 @@ class FileBase(object):
 
     @property
     def has_extension(self):
-        """Returns True if self.extension is set, else False."""
+        """True if self.extension is set, else False."""
         if self.extension is None:
             return False
         else:
@@ -132,35 +138,42 @@ class FileBase(object):
 
     @property
     def is_dangerous(self):
-        """True if file has been marked 'dangerous' else False."""
+        """True if file has been marked 'dangerous', else False."""
         return self._file_props['safety_category'] is 'dangerous'
 
     @property
     def is_unknown(self):
-        """True if file has been marked 'unknown' else False."""
+        """True if file has been marked 'unknown', else False."""
         return self._file_props['safety_category'] is 'unknown'
 
     @property
     def is_binary(self):
-        """True if file has been marked 'binary' else False."""
+        """True if file has been marked 'binary', else False."""
         return self._file_props['safety_category'] is 'binary'
 
     @property
     def is_symlink(self):
-        """Returns True and updates log if file is a symlink."""
+        """True  if file is a symlink, else False."""
         if self._file_props['symlink'] is False:
             return False
         else:
             return True
 
     def set_property(self, prop_string, value):
-        """Takes a property + a value and adds them to self._file_props."""
+        """
+        Take a property and a value and add them to self._file_props.
+
+        If prop_string is already in _file_props, set prop_string to value.
+        If prop_string not in _file_props, set prop_string to value in
+        _file_props['user_defined'].
+        """
         if prop_string in self._file_props.keys():
             self._file_props[prop_string] = value
         else:
             self._file_props['user_defined'][prop_string] = value
 
     def get_property(self, file_prop):
+        """Get the value for a property in _file_props."""
         # TODO: could probably be refactored
         if file_prop in self._file_props:
             return self._file_props[file_prop]
@@ -170,16 +183,18 @@ class FileBase(object):
             return None
 
     def add_error(self, error, info):
+        """Add an error: info pair to _file_props['errors']."""
         self._file_props['errors'].update({error: info})
 
     def add_file_string(self, file_string):
+        """Add a file descriptor string to _file_props."""
         self._file_props['file_string_set'].add(file_string)
 
     def make_dangerous(self, reason_string=None):
         """
-        Marks a file as dangerous.
+        Mark file as dangerous.
 
-        Prepends and appends DANGEROUS to the destination file name
+        Prepend and append DANGEROUS to the destination file name
         to help prevent double-click of death.
         """
         if self.is_dangerous:
@@ -190,7 +205,7 @@ class FileBase(object):
         self.dst_path = os.path.join(path, 'DANGEROUS_{}_DANGEROUS'.format(filename))
 
     def make_unknown(self):
-        """Marks a file as an unknown type and prepends UNKNOWN to filename."""
+        """Mark file as an unknown type and prepend UNKNOWN to filename."""
         if self.is_dangerous or self.is_binary:
             return
         self.set_property('safety_category', 'unknown')
@@ -198,7 +213,7 @@ class FileBase(object):
         self.dst_path = os.path.join(path, 'UNKNOWN_{}'.format(filename))
 
     def make_binary(self):
-        """Marks a file as a binary and appends .bin to filename."""
+        """Mark file as a binary and append .bin to filename."""
         if self.is_dangerous:
             return
         self.set_property('safety_category', 'binary')
@@ -206,7 +221,7 @@ class FileBase(object):
         self.dst_path = os.path.join(path, '{}.bin'.format(filename))
 
     def safe_copy(self, src=None, dst=None):
-        """Copy a file and create directory if needed."""
+        """Copy file and create destination directories if needed."""
         if src is None:
             src = self.src_path
         if dst is None:
@@ -220,7 +235,7 @@ class FileBase(object):
             self.add_error(e, '')
 
     def force_ext(self, ext):
-        """If dst_path does not end in ext, changes it and edits _file_props."""
+        """If dst_path does not end in ext, change it and edit _file_props."""
         if not self.dst_path.endswith(ext):
             self.set_property('force_ext', True)
             self.dst_path += ext
@@ -228,7 +243,7 @@ class FileBase(object):
             self.set_property('extension', ext)
 
     def create_metadata_file(self, ext):
-        """Create a separate file to hold this file's metadata."""
+        """Create a separate file to hold metadata from this file."""
         try:
             # make sure we aren't overwriting anything
             if os.path.exists(self.src_path + ext):
@@ -247,7 +262,7 @@ class FileBase(object):
             return False
 
     def write_log(self):
-        """Print the logs related to the current file being processed."""
+        """Write logs from file to self.logger."""
         file_log = self.logger.add_file(self)
         file_log.fields(**self._file_props)
 
@@ -273,7 +288,7 @@ class GroomerLogger(object):
             self.log_debug_out = os.devnull
 
     def tree(self, base_dir, padding='   '):
-        """Writes a graphical tree to the log for a given directory."""
+        """Write a graphical tree to the log for `base_dir`."""
         with open(self.log_content, 'ab') as lf:
             lf.write(bytes('#' * 80 + '\n', 'UTF-8'))
             lf.write(bytes('{}+- {}/\n'.format(padding, os.path.basename(os.path.abspath(base_dir)).encode()), 'utf8'))
@@ -289,7 +304,7 @@ class GroomerLogger(object):
                     lf.write('{}+-- {}\t- {}\n'.format(padding, f, self._computehash(curpath)).encode(errors='ignore'))
 
     def _computehash(self, path):
-        """Returns a sha256 hash of a file at a given path."""
+        """Return a sha256 hash of a file at a given path."""
         s = hashlib.sha256()
         with open(path, 'rb') as f:
             while True:
@@ -300,6 +315,7 @@ class GroomerLogger(object):
         return s.hexdigest()
 
     def add_file(self, file):
+        """Add a file to the log."""
         return self.log.name('file.src_path')
 
 
@@ -340,9 +356,7 @@ class KittenGroomerBase(object):
 
     # TODO: feels like this function doesn't need to exist if we move main()
     def processdir(self, src_dir, dst_dir):
-        """
-        Implement this function in your subclass to define file processing behavior.
-        """
+        """Implement this function to define file processing behavior."""
         raise ImplementationRequired('Please implement processdir.')
 
 

From 1abfb432b1170051c47468bb2c0b3209f7a17ce2 Mon Sep 17 00:00:00 2001
From: Dan Puttick <dputtick@users.noreply.github.com>
Date: Wed, 15 Mar 2017 22:53:14 -0400
Subject: [PATCH 36/36] Edit README.md

* Mention that example files are not up to date with the new API changes
* Update example code for API changes
---
 README.md | 98 +++++++++++++++++++++++--------------------------------
 setup.py  |  2 +-
 2 files changed, 42 insertions(+), 58 deletions(-)

diff --git a/README.md b/README.md
index f0a0cf3..0f368ab 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@
 PyCIRCLean is the core Python code used by [CIRCLean](https://github.com/CIRCL/Circlean/), an open-source
 USB key and document sanitizer created by [CIRCL](https://www.circl.lu/). This module has been separated from the
 device-specific scripts and can be used for dedicated security applications to sanitize documents from hostile environments
-to trusted environments. PyCIRCLean is currently Python 3.3+ only.
+to trusted environments. PyCIRCLean is currently Python 3.3+ compatible.
 
 # Installation
 
@@ -23,10 +23,12 @@ pip install .
 # How to use PyCIRCLean
 
 PyCIRCLean is a simple Python library to handle file checking and sanitization.
-PyCIRCLean is designed to be overloaded and extended to cover specific checking
+PyCIRCLean is designed to be extended to cover specific checking
 and sanitization workflows in different organizations such as industrial
 environments or restricted/classified ICT environments. A series of practical examples utilizing PyCIRCLean can be found
-in the [./examples](./examples) directory.
+in the [./examples](./examples) directory. Note: for commits beyond version 2.2.0 these
+examples are not guaranteed to work with the PyCIRCLean API. Please check [helpers.py](./kittengroomer/helpers.py) or
+[filecheck.py](./bin/filecheck.py) to see the new API interface.
 
 The following simple example using PyCIRCLean will only copy files with a .conf extension matching the 'text/plain' MIME
 type. If any other file is found in the source directory, the files won't be copied to the destination directory.
@@ -41,87 +43,69 @@ from kittengroomer import FileBase, KittenGroomerBase, main
 
 
 # Extension
-configfiles = {'.conf': 'text/plain'}
+class Config:
+    configfiles = {'.conf': 'text/plain'}
 
 
 class FileSpec(FileBase):
 
     def __init__(self, src_path, dst_path):
-        ''' Init file object, set the extension '''
+        """Init file object, set the extension."""
         super(FileSpec, self).__init__(src_path, dst_path)
+        self.valid_files = {}
         a, self.extension = os.path.splitext(self.src_path)
         self.mimetype = magic.from_file(self.src_path, mime=True).decode("utf-8")
+        # The initial version will only accept the file extensions/mimetypes listed here.
+        self.valid_files.update(Config.configfiles)
+
+    def check(self):
+        valid = True
+        expected_mime = self.valid_files.get(self.extension)
+        if expected_mime is None:
+            # Unexpected extension => disallowed
+            valid = False
+            compare_ext = 'Extension: {} - Expected: {}'.format(self.cur_file.extension, ', '.join(self.valid_files.keys()))
+        elif self.mimetype != expected_mime:
+            # Unexpected mimetype => disallowed
+            valid = False
+            compare_mime = 'Mime: {} - Expected: {}'.format(self.cur_file.mimetype, expected_mime)
+        self.add_log_details('valid', valid)
+        if valid:
+            self.cur_file.log_string = 'Extension: {} - MimeType: {}'.format(self.cur_file.extension, self.cur_file.mimetype)
+        else:
+            self.should_copy = False
+            if compare_ext is not None:
+                self.add_log_string(compare_ext)
+            else:
+                self.add_log_string(compare_mime)
+        if self.should_copy:
+            self.safe_copy()
+        self.write_log()
 
 
 class KittenGroomerSpec(KittenGroomerBase):
 
     def __init__(self, root_src=None, root_dst=None):
-        '''
-            Initialize the basics of the copy
-        '''
+        """Initialize the basics of the copy."""
         if root_src is None:
             root_src = os.path.join(os.sep, 'media', 'src')
         if root_dst is None:
             root_dst = os.path.join(os.sep, 'media', 'dst')
         super(KittenGroomerSpec, self).__init__(root_src, root_dst)
-        self.valid_files = {}
-
-        # The initial version will only accept the file extensions/mimetypes listed here.
-        self.valid_files.update(configfiles)
-
-    def _print_log(self):
-        '''
-            Print the logs related to the current file being processed
-        '''
-        tmp_log = self.log_name.fields(**self.cur_file.log_details)
-        if not self.cur_file.log_details.get('valid'):
-            tmp_log.warning(self.cur_file.log_string)
-        else:
-            tmp_log.debug(self.cur_file.log_string)
 
     def processdir(self):
-        '''
-            Main function doing the processing
-        '''
+        """Main function doing the processing."""
         to_copy = []
         error = []
         for srcpath in self._list_all_files(self.src_root_dir):
-            valid = True
-            self.log_name.info('Processing {}', srcpath.replace(self.src_root_dir + '/', ''))
-            self.cur_file = FileSpec(srcpath, srcpath.replace(self.src_root_dir, self.dst_root_dir))
-            expected_mime = self.valid_files.get(self.cur_file.extension)
-            if expected_mime is None:
-                # Unexpected extension => disallowed
-                valid = False
-                compare_ext = 'Extension: {} - Expected: {}'.format(self.cur_file.extension, ', '.join(self.valid_files.keys()))
-            elif self.cur_file.mimetype != expected_mime:
-                # Unexpected mimetype => disallowed
-                valid = False
-                compare_mime = 'Mime: {} - Expected: {}'.format(self.cur_file.mimetype, expected_mime)
-            self.cur_file.add_log_details('valid', valid)
-            if valid:
-                to_copy.append(self.cur_file)
-                self.cur_file.log_string = 'Extension: {} - MimeType: {}'.format(self.cur_file.extension, self.cur_file.mimetype)
-            else:
-                error.append(self.cur_file)
-                if compare_ext is not None:
-                    self.cur_file.log_string = compare_ext
-                else:
-                    self.cur_file.log_string = compare_mime
-        if len(error) > 0:
-            for f in error + to_copy:
-                self.cur_file = f
-                self._print_log()
-        else:
-            for f in to_copy:
-                self.cur_file = f
-                self._safe_copy()
-                self._print_log()
+            dstpath = srcpath.replace(self.src_root_dir, self.dst_root_dir)
+            cur_file = FileSpec(srcpath, dstpath)
+            cur_file.check()
 
 
 if __name__ == '__main__':
     main(KittenGroomerSpec, ' Only copy some files, returns an error is anything else is found')
-    exit(0)
+
 ~~~
 
 # How to contribute
diff --git a/setup.py b/setup.py
index c11f64d..4397c5f 100644
--- a/setup.py
+++ b/setup.py
@@ -4,7 +4,7 @@ from setuptools import setup
 
 setup(
     name='kittengroomer',
-    version='2.1',
+    version='2.1.0',
     author='Raphaël Vinot',
     author_email='raphael.vinot@circl.lu',
     maintainer='Raphaël Vinot',