Merge branch 'master' of github.com:CIRCL/PyCIRCLean

2017-03-10 14:45:11 +01:00 · 2017-03-10 14:45:11 +01:00 · 8a186bfd26
parent b0224d781a 515687b1fe
commit 8a186bfd26
40 changed files with 374 additions and 545 deletions
--- a/.gitignore
+++ b/.gitignore
@ -67,8 +67,8 @@ target/
 *.vrb
 # Project specific
-/tests/dst/*
+tests/dst/*
-!/tests/logs/
+tests/test_logs/*
-!/tests/.keepdir
+!tests/**/.keepdir
-
+!tests/src_invalid/*
-
+!tests/src_valid/*
--- a/.travis.yml
+++ b/.travis.yml
@ -1,7 +1,6 @@
 language: python
 python:
    - 2.7
    - 3.3
    - 3.4
    - 3.5
@ -17,8 +16,6 @@ addons:
        packages:
            # General dependencies
            - p7zip-full
            # generic.py dependencies
            - ghostscript
            # Testing dependencies
            - mercurial
@ -26,45 +23,28 @@ install:
    # General dependencies
    - sudo add-apt-repository "deb http://archive.ubuntu.com/ubuntu/ trusty multiverse" && sudo add-apt-repository "deb http://archive.ubuntu.com/ubuntu/ trusty-updates multiverse"
    - sudo apt-get update -qq
-    - sudo apt-get install -y p7zip-rar
+    - sudo apt-get install -y p7zip-rar python-pip
    # generic.py: pdf2htmlEX + dependencies
    - sudo add-apt-repository ppa:fontforge/fontforge --yes
    # to get a working 0.26 poppler
    - sudo add-apt-repository ppa:delayargentina/delayx --yes
    - sudo apt-get update -qq
    - sudo apt-get install -y libpoppler-dev libpoppler-private-dev libspiro-dev libcairo-dev libpango1.0-dev libfreetype6-dev libltdl-dev libfontforge-dev python-imaging python-pip firefox xvfb
    - git clone https://github.com/coolwanglu/pdf2htmlEX.git
    - pushd pdf2htmlEX
    - cmake -DCMAKE_INSTALL_PREFIX:PATH=/usr -DENABLE_SVG=ON .
    - make
    - sudo make install
    - popd
    # generic.py: Other dependencies
    - sudo apt-get install -y libreoffice libreoffice-script-provider-python unoconv
    # filecheck.py dependencies
    - sudo apt-get install libxml2-dev libxslt1-dev
    - wget https://didierstevens.com/files/software/pdfid_v0_2_1.zip
    - unzip pdfid_v0_2_1.zip
    - pip install -U pip
-    - pip install lxml exifread pillow
+    - pip install lxml exifread pillow olefile
-    - pip install git+https://github.com/Rafiot/officedissector.git
+    - pip install git+https://github.com/decalage2/oletools.git
-    - |
+    - pip install git+https://github.com/grierforensics/officedissector.git
-      if [[ "$TRAVIS_PYTHON_VERSION" == 2* ]]; then
+    # PyCIRCLean dependencies
        pip install -U oletools olefile
      fi
    # Module dependencies
    - pip install -r dev-requirements.txt
    - pip install coveralls codecov
    # Testing dependencies
    - sudo apt-get install rar
    # Prepare tests
-    # Zoo
+    # Malware from theZoo
    - git clone https://github.com/Rafiot/theZoo.git
    - pushd theZoo/malwares/Binaries
    - python unpackall.py
    - popd
-    - mv theZoo/malwares/Binaries/out tests/src_complex/
+    - mv theZoo/malwares/Binaries/out tests/src_invalid/
-    # Path traversal
+    # Path traversal attacks
    - git clone https://github.com/jwilk/path-traversal-samples
    - pushd path-traversal-samples
    - pushd zip
@ -74,25 +54,25 @@ install:
    - make
    - popd
    - popd
-    - mv path-traversal-samples/zip/*.zip tests/src_complex/
+    - mv path-traversal-samples/zip/*.zip tests/src_invalid/
-    - mv path-traversal-samples/rar/*.rar tests/src_complex/
+    - mv path-traversal-samples/rar/*.rar tests/src_invalid/
    # Office docs
    - git clone https://github.com/eea/odfpy.git
-    - mv odfpy/tests/examples/* tests/src_complex/
+    - mv odfpy/tests/examples/* tests/src_invalid/
-    - pushd tests/src_complex/
+    - pushd tests/src_invalid/
    - wget https://bitbucket.org/decalage/olefileio_pl/raw/3073963b640935134ed0da34906fea8e506460be/Tests/images/test-ole-file.doc
    - wget --no-check-certificate https://www.officedissector.com/corpus/fraunhoferlibrary.zip
    - unzip -o fraunhoferlibrary.zip
    - rm fraunhoferlibrary.zip
-    - 7z x 42.zip -p42
+    - 7z x -p42 42.zip
    # Some random samples
    - wget http://www.sample-videos.com/audio/mp3/india-national-anthem.mp3
    - wget http://www.sample-videos.com/video/mp4/720/big_buck_bunny_720p_1mb.mp4
    - wget http://thewalter.net/stef/software/rtfx/sample.rtf
    - echo "blah" > test.obj
    - popd
 script:
-    - travis_wait 60 py.test --cov=kittengroomer --cov=bin tests/
+    - travis_wait 30 py.test --cov=kittengroomer --cov=bin tests/
 notifications:
    email:
--- a/9
+++ b/9
@ -1,9 +0,0 @@
 Changelog
 =========
 2.1.0
 ---
 New features:
 Fixes:
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -0,0 +1,19 @@
 Changelog
 =========
 2.1.0
 ---
 New features:
 - Dropped Python 2.7 support: PyCIRCLean is now Python 3.3+ only
 - Tests are now easier to write and run: we have support for pytest and tox!
 - More documentation: both docstrings and more detailed readmes
 - Added more types of examples for testing
 - The Travis build now runs in ~10 minutes vs. ~30 minutes before
 Fixes:
 - Extension matching now catches lower/upper case errors
 - Fixed remaining python 3 issues with filecheck.py
 - Fixed support for .rtf files
 - Many other small filetype related fixes
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -29,5 +29,13 @@ or if you have an example you'd like to contribute.
 Running the tests
 =================
-* Running the tests is easy. First, make sure you've installed the project and testing dependencies.
+* Running the tests is fairly straightforward.
-Then, run `python -m pytest` or just `pytest` in the top level or /tests directory.
+* First, make sure you've installed the project and testing dependencies.
 * Then, run `python -m pytest` or just `pytest` in the top level directory of the module.
 * Each integration test that runs will generate a timestamped copy of the log for that run
 in the tests/testlogs directory.
 * If you'd like to get information about code coverage, run the tests using
 `pytest --cov=kittengroomer`.
 * You can test with multiple versions of Python if you have them installed
 by running `pip install tox` and then `tox`. Make sure you modify "envlist"
 in tox.ini for the Python versions you plan to use.
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -1 +1 @@
-include kittengroomer/data/* README.md CONTRIBUTING.md CHANGELOG dev-requirements.txt
+include README.md CONTRIBUTING.md CHANGELOG dev-requirements.txt
--- a/README.md
+++ b/README.md
@ -7,7 +7,7 @@
 PyCIRCLean is the core Python code used by [CIRCLean](https://github.com/CIRCL/Circlean/), an open-source
 USB key and document sanitizer created by [CIRCL](https://www.circl.lu/). This module has been separated from the 
 device-specific scripts and can be used for dedicated security applications to sanitize documents from hostile environments 
-to trusted environments.
+to trusted environments. PyCIRCLean is currently Python 3.3+ only.
 # Installation
@ -26,7 +26,7 @@ pip install .
 PyCIRCLean is a simple Python library to handle file checking and sanitization. PyCIRCLean is designed as a simple library
 that can be overloaded to cover specific checking and sanitization workflows in different organizations like industrial
 environments or restricted/classified ICT environments. A series of practical examples utilizing PyCIRCLean can be found
-in the [./bin](./bin) directory.
+in the [./examples](./examples) directory.
 The following simple example using PyCIRCLean will only copy files with a .conf extension matching the 'text/plain' MIME
 type. If any other file is found in the source directory, the files won't be copied to the destination directory.
--- a/bin/README.md
+++ b/bin/README.md
@ -1,70 +1,33 @@
 Example scripts
 ===============
 These are a series of example scripts designed to demonstrate PyCIRCLean's capabilities. Feel free to
 adapt or modify any of them to suit your requirements. In order to use any of these scripts, you will need to
 install the PyCIRCLean dependencies (preferably in a virtualenv):
 ```
    pip install git+https://github.com/ahupp/python-magic.git   # we cannot use the PyPi package for now due to a bug
    python setup.py install     # from the root of the repository
 ```
 Requirements per script
 =======================
 filecheck.py
------------
+============
-*WARNING*: Only works with Python 2.7 (oletools and olefile aren't ported to Python3 for now)
+This is the script used by the [CIRCLean](https://github.com/CIRCL/Circlean)
 USB key sanitizer. It is designed to handle a range of file types, and will
 mark them as dangerous if they meet certain criteria.
-Requirements by type of document:
+Before installing the filecheck.py depenencies, make sure to install the PyCIRCLean
 dependencies:
 ```
    pip install .
 ```
 Dependencies by type of document:
 * Microsoft office: oletools, olefile
 * OOXML: officedissector
 * PDF: pdfid
 * Archives: p7zip-full, p7zip-rar
 * Metadata: exifread
 * Images: pillow
 Note: pdfid is a not installable with pip. It must be downloaded and installed
 manually in the directory where filecheck will be run.
 ```
    sudo apt-get install p7zip-full p7zip-rar libxml2-dev libxslt1-dev
-    pip install lxml officedissector git+https://github.com/ahupp/python-magic.git oletools olefile
+    pip install lxml oletools olefile pillow exifread
    pip install git+https://github.com/Rafiot/officedissector.git
-    # pdfid is not a package, installing manually
+    # installing pdfid manually
    wget https://didierstevens.com/files/software/pdfid_v0_2_1.zip
    unzip pdfid_v0_2_1.zip
    python setup.py -q install
 ```
 generic.py
 ----------
 Requirements by type of document:
 * Office and all text files: unoconv, libreoffice
 * PDF: ghostscript, pdf2htmlEX
 ```
    # required for pdf2htmlEX
    sudo add-apt-repository ppa:fontforge/fontforge --yes
    sudo add-apt-repository ppa:coolwanglu/pdf2htmlex --yes
    sudo apt-get update -qq
    sudo apt-get install -qq libpoppler-dev libpoppler-private-dev libspiro-dev libcairo-dev libpango1.0-dev libfreetype6-dev libltdl-dev libfontforge-dev python-imaging python-pip firefox xvfb
    # install pdf2htmlEX
    git clone https://github.com/coolwanglu/pdf2htmlEX.git
    pushd pdf2htmlEX
    cmake -DCMAKE_INSTALL_PREFIX:PATH=/usr -DENABLE_SVG=ON .
    make
    sudo make install
    popd
    # Installing the rest
    sudo apt-get install ghostscript p7zip-full p7zip-rar libreoffice unoconv
 ```
 pier9.py
 --------
 No external dependencies required.
 specific.py
 -----------
 No external dependencies required.
--- a/bin/filecheck.py
+++ b/bin/filecheck.py
@ -1,11 +1,9 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 import os
 import sys
 import mimetypes
 import shlex
 import subprocess
 import time
 import zipfile
 import oletools.oleid
@ -21,8 +19,7 @@ from pdfid import PDFiD, cPDFiD
 from kittengroomer import FileBase, KittenGroomerBase, main
-SEVENZ = '/usr/bin/7z'
+SEVENZ_PATH = '/usr/bin/7z'
 PY3 = sys.version_info.major == 3
 # Prepare application/<subtype>
@ -41,7 +38,7 @@ mimes_data = ['octet-stream']
 mimes_exif = ['image/jpeg', 'image/tiff']
 mimes_png = ['image/png']
-# Mime types we can pull metadata from
+# Mimetypes we can pull metadata from
 mimes_metadata = ['image/jpeg', 'image/tiff', 'image/png']
 # Aliases
@ -62,7 +59,7 @@ propertype = {'.gz': 'application/gzip'}
 # Commonly used malicious extensions
 # Sources: http://www.howtogeek.com/137270/50-file-extensions-that-are-potentially-dangerous-on-windows/
 # https://github.com/wiregit/wirecode/blob/master/components/core-settings/src/main/java/org/limewire/core/settings/FilterSettings.java
-mal_ext = (
+MAL_EXTS = (
    # Applications
    ".exe", ".pif", ".application", ".gadget", ".msi", ".msp", ".com", ".scr",
    ".hta", ".cpl", ".msc", ".jar",
@ -86,55 +83,58 @@ mal_ext = (
 class File(FileBase):
    def __init__(self, src_path, dst_path):
        ''' Init file object, set the mimetype '''
        super(File, self).__init__(src_path, dst_path)
        self.is_recursive = False
-        if not self.has_mimetype():
+        self._check_dangerous()
            # No mimetype, should not happen.
            self.make_dangerous()
        if not self.has_extension():
            self.make_dangerous()
        if self.extension in mal_ext:
            self.log_details.update({'malicious_extension': self.extension})
            self.make_dangerous()
        if self.is_dangerous():
            return
        self.log_details.update({'maintype': self.main_type,
                                 'subtype': self.sub_type,
                                 'extension': self.extension})
        self._check_extension()
        self._check_mime()
-        # Check correlation known extension => actual mime type
+    def _check_dangerous(self):
        if not self.has_mimetype():
            # No mimetype, should not happen.
            self.make_dangerous()
        if not self.has_extension():
            self.make_dangerous()
        if self.extension in MAL_EXTS:
            self.log_details.update({'malicious_extension': self.extension})
            self.make_dangerous()
    def _check_extension(self):
        """Guesses the file's mimetype based on its extension. If the file's
        mimetype (as determined by libmagic) is contained in the mimetype
        module's list of valid mimetypes and the expected mimetype based on its
        extension differs from the mimetype determined by libmagic, then it
        marks the file as dangerous."""
        if propertype.get(self.extension) is not None:
            expected_mimetype = propertype.get(self.extension)
        else:
            expected_mimetype, encoding = mimetypes.guess_type(self.src_path, strict=False)
            if aliases.get(expected_mimetype) is not None:
                expected_mimetype = aliases.get(expected_mimetype)
        is_known_extension = self.extension in mimetypes.types_map.keys()
        if is_known_extension and expected_mimetype != self.mimetype:
            self.log_details.update({'expected_mimetype': expected_mimetype})
            self.make_dangerous()
-        # check correlation actual mime type => known extensions
+    def _check_mime(self):
        """Takes the mimetype (as determined by libmagic) and determines
        whether the list of extensions that are normally associated with
        that extension contains the file's actual extension."""
        if aliases.get(self.mimetype) is not None:
            mimetype = aliases.get(self.mimetype)
        else:
            mimetype = self.mimetype
        expected_extensions = mimetypes.guess_all_extensions(mimetype, strict=False)
        if expected_extensions:
            if len(self.extension) > 0 and self.extension not in expected_extensions:
                self.log_details.update({'expected_extensions': expected_extensions})
                self.make_dangerous()
        else:
            # there are no known extensions associated to this mimetype.
            pass
    def has_metadata(self):
        if self.mimetype in mimes_metadata:
@ -144,18 +144,14 @@ class File(FileBase):
 class KittenGroomerFileCheck(KittenGroomerBase):
-    def __init__(self, root_src=None, root_dst=None, max_recursive=2, debug=False):
+    def __init__(self, root_src=None, root_dst=None, max_recursive_depth=2, debug=False):
        '''
            Initialize the basics of the conversion process
        '''
        if root_src is None:
            root_src = os.path.join(os.sep, 'media', 'src')
        if root_dst is None:
            root_dst = os.path.join(os.sep, 'media', 'dst')
        super(KittenGroomerFileCheck, self).__init__(root_src, root_dst, debug)
-
+        self.recursive_archive_depth = 0
-        self.recursive = 0
+        self.max_recursive_depth = max_recursive_depth
        self.max_recursive = max_recursive
        subtypes_apps = [
            (mimes_office, self._winoffice),
@ -189,21 +185,18 @@ class KittenGroomerFileCheck(KittenGroomerBase):
            'inode': self.inode,
        }
-    # ##### Helpers #####
+    # ##### Helper functions #####
    def _init_subtypes_application(self, subtypes_application):
-        '''
+        """Creates a dictionary with the right method based on the sub mime type."""
-            Create the Dict to pick the right function based on the sub mime type
+        subtype_dict = {}
-        '''
+        for list_subtypes, func in subtypes_application:
        to_return = {}
        for list_subtypes, fct in subtypes_application:
            for st in list_subtypes:
-                to_return[st] = fct
+                subtype_dict[st] = func
-        return to_return
+        return subtype_dict
    def _print_log(self):
-        '''
+        """Print the logs related to the current file being processed."""
-            Print the logs related to the current file being processed
+        # TODO: change name to _write_log
        '''
        tmp_log = self.log_name.fields(**self.cur_file.log_details)
        if self.cur_file.is_dangerous():
            tmp_log.warning(self.cur_file.log_string)
@ -212,66 +205,53 @@ class KittenGroomerFileCheck(KittenGroomerBase):
        else:
            tmp_log.debug(self.cur_file.log_string)
-    def _run_process(self, command_line, timeout=0, background=False):
+    def _run_process(self, command_string, timeout=None):
-        '''Run subprocess, wait until it finishes'''
+        """Run command_string in a subprocess, wait until it finishes."""
-        if timeout != 0:
+        args = shlex.split(command_string)
            deadline = time.time() + timeout
        else:
            deadline = None
        args = shlex.split(command_line)
        with open(self.log_debug_err, 'ab') as stderr, open(self.log_debug_out, 'ab') as stdout:
-            p = subprocess.Popen(args, stdout=stdout, stderr=stderr)
+            try:
-        if background:
+                subprocess.check_call(args, stdout=stdout, stderr=stderr, timeout=timeout)
-            # This timer is here to make sure the unoconv listener is properly started.
+            except (subprocess.TimeoutExpired, subprocess.CalledProcessError):
-            time.sleep(10)
+                return
            return True
        while True:
            code = p.poll()
            if code is not None:
                break
            if deadline is not None and time.time() > deadline:
                p.kill()
                break
            time.sleep(1)
        return True
    #######################
-
+    # ##### Discarded mimetypes, reason in the docstring ######
    # ##### Discarded mime types, reason in the comments ######
    def inode(self):
-        ''' Usually empty file. No reason (?) to copy it on the dest key'''
+        """Empty file or symlink."""
        if self.cur_file.is_symlink():
-            self.cur_file.log_string += 'Symlink to {}'.format(self.log_details['symlink'])
+            self.cur_file.log_string += 'Symlink to {}'.format(self.cur_file.log_details['symlink'])
        else:
            self.cur_file.log_string += 'Inode file'
    def unknown(self):
-        ''' This main type is unknown, that should not happen '''
+        """Main type should never be unknown."""
        self.cur_file.log_string += 'Unknown file'
    def example(self):
-        '''Used in examples, should never be returned by libmagic'''
+        """Used in examples, should never be returned by libmagic."""
        self.cur_file.log_string += 'Example file'
    def multipart(self):
-        '''Used in web apps, should never be returned by libmagic'''
+        """Used in web apps, should never be returned by libmagic"""
        self.cur_file.log_string += 'Multipart file'
-    # ##### Threated as malicious, no reason to have it on a USB key ######
+    # ##### Treated as malicious, no reason to have it on a USB key ######
    def message(self):
-        '''Way to process message file'''
+        """Process a message file."""
        self.cur_file.log_string += 'Message file'
        self.cur_file.make_dangerous()
        self._safe_copy()
    def model(self):
-        '''Way to process model file'''
+        """Process a model file."""
        self.cur_file.log_string += 'Model file'
        self.cur_file.make_dangerous()
        self._safe_copy()
-    # ##### Converted ######
+    # ##### Files that will be converted ######
    def text(self):
        """Process an rtf, ooxml, or plaintext file."""
        for r in mimes_rtf:
            if r in self.cur_file.sub_type:
                self.cur_file.log_string += 'Rich Text file'
@ -289,7 +269,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
        self._safe_copy()
    def application(self):
-        ''' Everything can be there, using the subtype to decide '''
+        """Processes an application specific file according to its subtype."""
        for subtype, fct in self.subtypes_application.items():
            if subtype in self.cur_file.sub_type:
                fct()
@ -299,12 +279,13 @@ class KittenGroomerFileCheck(KittenGroomerBase):
        self._unknown_app()
    def _executables(self):
-        '''Way to process executable file'''
+        """Processes an executable file."""
        self.cur_file.add_log_details('processing_type', 'executable')
        self.cur_file.make_dangerous()
        self._safe_copy()
    def _winoffice(self):
        """Processes a winoffice file using olefile/oletools."""
        self.cur_file.add_log_details('processing_type', 'WinOffice')
        # Try as if it is a valid document
        oid = oletools.oleid.OleID(self.cur_file.src_path)
@ -343,6 +324,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
        self._safe_copy()
    def _ooxml(self):
        """Processes an ooxml file."""
        self.cur_file.add_log_details('processing_type', 'ooxml')
        try:
            doc = officedissector.doc.Document(self.cur_file.src_path)
@ -369,6 +351,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
        self._safe_copy()
    def _libreoffice(self):
        """Processes a libreoffice file."""
        self.cur_file.add_log_details('processing_type', 'libreoffice')
        # As long as there ar no way to do a sanity check on the files => dangerous
        try:
@ -385,55 +368,69 @@ class KittenGroomerFileCheck(KittenGroomerBase):
        self._safe_copy()
    def _pdf(self):
-        '''Way to process PDF file'''
+        """Processes a PDF file."""
        self.cur_file.add_log_details('processing_type', 'pdf')
        xmlDoc = PDFiD(self.cur_file.src_path)
        oPDFiD = cPDFiD(xmlDoc, True)
        # TODO: other keywords?
-        if oPDFiD.encrypt > 0:
+        if oPDFiD.encrypt.count > 0:
            self.cur_file.add_log_details('encrypted', True)
            self.cur_file.make_dangerous()
-        if oPDFiD.js > 0 or oPDFiD.javascript > 0:
+        if oPDFiD.js.count > 0 or oPDFiD.javascript.count > 0:
            self.cur_file.add_log_details('javascript', True)
            self.cur_file.make_dangerous()
-        if oPDFiD.aa > 0 or oPDFiD.openaction > 0:
+        if oPDFiD.aa.count > 0 or oPDFiD.openaction.count > 0:
            self.cur_file.add_log_details('openaction', True)
            self.cur_file.make_dangerous()
-        if oPDFiD.richmedia > 0:
+        if oPDFiD.richmedia.count > 0:
            self.cur_file.add_log_details('flash', True)
            self.cur_file.make_dangerous()
-        if oPDFiD.launch > 0:
+        if oPDFiD.launch.count > 0:
            self.cur_file.add_log_details('launch', True)
            self.cur_file.make_dangerous()
    def _archive(self):
-        '''Way to process Archive'''
+        """Processes an archive using 7zip. The archive is extracted to a
        temporary directory and self.processdir is called on that directory.
        The recursive archive depth is increased to protect against archive
        bombs."""
        self.cur_file.add_log_details('processing_type', 'archive')
        self.cur_file.is_recursive = True
        self.cur_file.log_string += 'Archive extracted, processing content.'
        tmpdir = self.cur_file.dst_path + '_temp'
        self._safe_mkdir(tmpdir)
-        extract_command = '{} -p1 x "{}" -o"{}" -bd -aoa'.format(SEVENZ, self.cur_file.src_path, tmpdir)
+        extract_command = '{} -p1 x "{}" -o"{}" -bd -aoa'.format(SEVENZ_PATH, self.cur_file.src_path, tmpdir)
        self._run_process(extract_command)
-        self.recursive += 1
+        self.recursive_archive_depth += 1
        self.tree(tmpdir)
        self.processdir(tmpdir, self.cur_file.dst_path)
-        self.recursive -= 1
+        self.recursive_archive_depth -= 1
        self._safe_rmtree(tmpdir)
    def _handle_archivebomb(self, src_dir):
        self.cur_file.make_dangerous()
        self.cur_file.add_log_details('Archive Bomb', True)
        self.log_name.warning('ARCHIVE BOMB.')
        self.log_name.warning('The content of the archive contains recursively other archives.')
        self.log_name.warning('This is a bad sign so the archive is not extracted to the destination key.')
        self._safe_rmtree(src_dir)
        if src_dir.endswith('_temp'):
            bomb_path = src_dir[:-len('_temp')]
            self._safe_remove(bomb_path)
    def _unknown_app(self):
-        '''Way to process an unknown file'''
+        """Processes an unknown file."""
        self.cur_file.make_unknown()
        self._safe_copy()
    def _binary_app(self):
-        '''Way to process an unknown binary file'''
+        """Processses an unknown binary file."""
        self.cur_file.make_binary()
        self._safe_copy()
    #######################
    # Metadata extractors
-    def _metadata_exif(self, metadataFile):
+    def _metadata_exif(self, metadata_file):
        img = open(self.cur_file.src_path, 'rb')
        tags = None
@ -459,11 +456,11 @@ class KittenGroomerFileCheck(KittenGroomerBase):
                # Exifreader truncates data.
                if len(printable) > 25 and printable.endswith(", ... ]"):
                    value = tags[tag].values
-                    if isinstance(value, basestring):
+                    if isinstance(value, str):
                        printable = value
                    else:
                        printable = str(value)
-                metadataFile.write("Key: {}\tValue: {}\n".format(tag, printable))
+                metadata_file.write("Key: {}\tValue: {}\n".format(tag, printable))
        self.cur_file.add_log_details('metadata', 'exif')
        img.close()
        return True
@ -487,22 +484,36 @@ class KittenGroomerFileCheck(KittenGroomerBase):
            return False
    def extract_metadata(self):
-        metadataFile = self._safe_metadata_split(".metadata.txt")
+        metadata_file = self._safe_metadata_split(".metadata.txt")
-        success = self.metadata_processing_options.get(self.cur_file.mimetype)(metadataFile)
+        success = self.metadata_processing_options.get(self.cur_file.mimetype)(metadata_file)
-        metadataFile.close()
+        metadata_file.close()
        if not success:
            # FIXME Delete empty metadata file
            pass
    #######################
-    # ##### Not converted, checking the mime type ######
+    # ##### Media - audio and video aren't converted ######
    def audio(self):
-        '''Way to process an audio file'''
+        """Processes an audio file."""
        self.cur_file.log_string += 'Audio file'
        self._media_processing()
    def video(self):
        """Processes a video."""
        self.cur_file.log_string += 'Video file'
        self._media_processing()
    def _media_processing(self):
        """Generic way to process all media files."""
        self.cur_file.add_log_details('processing_type', 'media')
        self._safe_copy()
    def image(self):
-        '''Way to process an image'''
+        """Processes an image.
        Extracts metadata if metadata is present. Creates a temporary
        directory, opens the using PIL.Image, saves it to the temporary
        directory, and copies it to the destination."""
        if self.cur_file.has_metadata():
            self.extract_metadata()
@ -534,52 +545,40 @@ class KittenGroomerFileCheck(KittenGroomerBase):
        self.cur_file.log_string += 'Image file'
        self.cur_file.add_log_details('processing_type', 'image')
    def video(self):
        '''Way to process a video'''
        self.cur_file.log_string += 'Video file'
        self._media_processing()
    def _media_processing(self):
        '''Generic way to process all the media files'''
        self.cur_file.add_log_details('processing_type', 'media')
        self._safe_copy()
    #######################
    def process_file(self, srcpath, dstpath, relative_path):
        self.cur_file = File(srcpath, dstpath)
        self.log_name.info('Processing {} ({}/{})',
                           relative_path,
                           self.cur_file.main_type,
                           self.cur_file.sub_type)
        if not self.cur_file.is_dangerous():
            self.mime_processing_options.get(self.cur_file.main_type, self.unknown)()
        else:
            self._safe_copy()
        if not self.cur_file.is_recursive:
            self._print_log()
    def processdir(self, src_dir=None, dst_dir=None):
-        '''
+        """Main function coordinating file processing."""
            Main function doing the processing
        '''
        if src_dir is None:
            src_dir = self.src_root_dir
        if dst_dir is None:
            dst_dir = self.dst_root_dir
-        if self.recursive > 0:
+        if self.recursive_archive_depth > 0:
            self._print_log()
-        if self.recursive >= self.max_recursive:
+        if self.recursive_archive_depth >= self.max_recursive_depth:
-            self.cur_file.make_dangerous()
+            self._handle_archivebomb(src_dir)
            self.cur_file.add_log_details('Archive Bomb', True)
            self.log_name.warning('ARCHIVE BOMB.')
            self.log_name.warning('The content of the archive contains recursively other archives.')
            self.log_name.warning('This is a bad sign so the archive is not extracted to the destination key.')
            self._safe_rmtree(src_dir)
            if src_dir.endswith('_temp'):
                archbomb_path = src_dir[:-len('_temp')]
                self._safe_remove(archbomb_path)
        for srcpath in self._list_all_files(src_dir):
-            self.cur_file = File(srcpath, srcpath.replace(src_dir, dst_dir))
+            dstpath = srcpath.replace(src_dir, dst_dir)
            relative_path = srcpath.replace(src_dir + '/', '')
            # which path do we want in the log?
            self.process_file(srcpath, dstpath, relative_path)
            self.log_name.info('Processing {} ({}/{})', srcpath.replace(src_dir + '/', ''),
                               self.cur_file.main_type, self.cur_file.sub_type)
            if not self.cur_file.is_dangerous():
                self.mime_processing_options.get(self.cur_file.main_type, self.unknown)()
            else:
                self._safe_copy()
            if not self.cur_file.is_recursive:
                self._print_log()
 if __name__ == '__main__':
-    main(KittenGroomerFileCheck, 'Generic version of the KittenGroomer. Convert and rename files.')
+    main(KittenGroomerFileCheck, 'File sanitizer used in CIRCLean. Renames potentially dangerous files.')
--- a/examples/README.md
+++ b/examples/README.md
@ -0,0 +1,56 @@
 Examples
 ========
 These are several sanitizers that demonstrate PyCIRCLean's capabilities. Feel free to
 adapt or modify any of them to suit your requirements. In order to use any of these scripts,
 you will first need to install the PyCIRCLean dependencies (preferably in a virtualenv):
 ```
    pip install .
 ```
 Requirements per script
 =======================
 generic.py
 ----------
 This is a script that was used by an older version of CIRCLean.
 Requirements by type of document:
 * Office and all text files: unoconv, libreoffice
 * PDF: ghostscript, pdf2htmlEX
 ```
    # required for pdf2htmlEX
    sudo add-apt-repository ppa:fontforge/fontforge --yes
    sudo add-apt-repository ppa:coolwanglu/pdf2htmlex --yes
    sudo apt-get update -qq
    sudo apt-get install -qq libpoppler-dev libpoppler-private-dev libspiro-dev libcairo-dev libpango1.0-dev libfreetype6-dev libltdl-dev libfontforge-dev python-imaging python-pip firefox xvfb
    # install pdf2htmlEX
    git clone https://github.com/coolwanglu/pdf2htmlEX.git
    pushd pdf2htmlEX
    cmake -DCMAKE_INSTALL_PREFIX:PATH=/usr -DENABLE_SVG=ON .
    make
    sudo make install
    popd
    # Installing the rest
    sudo apt-get install ghostscript p7zip-full p7zip-rar libreoffice unoconv
 ```
 pier9.py
 --------
 This script contains a list of file formats for various brands of industrial
 manufacturing equipment, such as 3d printers, CNC machines, etc. It only
 copies files that match these file formats.
 No external dependencies required.
 specific.py
 -----------
 As the name suggests, this script copies only specific file formats according
 to the configuration provided by the user.
 No external dependencies required.
--- a/examples/generic.py
+++ b/examples/generic.py
--- a/examples/pier9.py
+++ b/examples/pier9.py
--- a/examples/specific.py
+++ b/examples/specific.py
--- a/kittengroomer/data/PDFA_def.ps
+++ b/kittengroomer/data/PDFA_def.ps
@ -1,40 +0,0 @@
 %!
 % This is a sample prefix file for creating a PDF/A document.
 % Feel free to modify entries marked with "Customize".
 % This assumes an ICC profile to reside in the file (ISO Coated sb.icc),
 % unless the user modifies the corresponding line below.
 % Define entries in the document Info dictionary :
 /ICCProfile (srgb.icc) % Customise
 def
 [ /Title (Title)       % Customise
  /DOCINFO pdfmark
 % Define an ICC profile :
 [/_objdef {icc_PDFA} /type /stream /OBJ pdfmark
 [{icc_PDFA}
 <<
  /N currentpagedevice /ProcessColorModel known {
    currentpagedevice /ProcessColorModel get dup /DeviceGray eq
    {pop 1} {
      /DeviceRGB eq
      {3}{4} ifelse
    } ifelse
  } {
    (ERROR, unable to determine ProcessColorModel) == flush
  } ifelse
 >> /PUT pdfmark
 [{icc_PDFA} ICCProfile (r) file /PUT pdfmark
 % Define the output intent dictionary :
 [/_objdef {OutputIntent_PDFA} /type /dict /OBJ pdfmark
 [{OutputIntent_PDFA} <<
  /Type /OutputIntent             % Must be so (the standard requires).
  /S /GTS_PDFA1                   % Must be so (the standard requires).
  /DestOutputProfile {icc_PDFA}            % Must be so (see above).
  /OutputConditionIdentifier (sRGB)      % Customize
 >> /PUT pdfmark
 [{Catalog} <</OutputIntents [ {OutputIntent_PDFA} ]>> /PUT pdfmark
--- a/kittengroomer/data/srgb.icc
+++ b/kittengroomer/data/srgb.icc
--- a/kittengroomer/helpers.py
+++ b/kittengroomer/helpers.py
@ -45,9 +45,13 @@ class FileBase(object):
        self.dst_path = dst_path
        self.log_details = {'filepath': self.src_path}
        self.log_string = ''
-        _, self.extension = os.path.splitext(self.src_path)
+        self._determine_extension()
        self._determine_mimetype()
    def _determine_extension(self):
        _, ext = os.path.splitext(self.src_path)
        self.extension = ext.lower()
    def _determine_mimetype(self):
        if os.path.islink(self.src_path):
            # magic will throw an IOError on a broken symlink
@ -55,6 +59,7 @@ class FileBase(object):
        else:
            try:
                mt = magic.from_file(self.src_path, mime=True)
                # magic will always return something, even if it's just 'data'
            except UnicodeEncodeError as e:
                # FIXME: The encoding of the file is broken (possibly UTF-16)
                mt = ''
@ -76,7 +81,6 @@ class FileBase(object):
        Returns False + updates log if self.main_type or self.sub_type
        are not set.
        """
        if not self.main_type or not self.sub_type:
            self.log_details.update({'broken_mime': True})
            return False
@ -88,16 +92,22 @@ class FileBase(object):
        Returns False + updates self.log_details if self.extension is not set.
        """
-        if not self.extension:
+        if self.extension == '':
            self.log_details.update({'no_extension': True})
            return False
        return True
    def is_dangerous(self):
        """Returns True if self.log_details contains 'dangerous'."""
-        if self.log_details.get('dangerous'):
+        return ('dangerous' in self.log_details)
-            return True
+
-        return False
+    def is_unknown(self):
        """Returns True if self.log_details contains 'unknown'."""
        return ('unknown' in self.log_details)
    def is_binary(self):
        """returns True if self.log_details contains 'binary'."""
        return ('binary' in self.log_details)
    def is_symlink(self):
        """Returns True and updates log if file is a symlink."""
@ -115,10 +125,9 @@ class FileBase(object):
        Marks a file as dangerous.
        Prepends and appends DANGEROUS to the destination file name
-        to avoid double-click of death.
+        to help prevent double-click of death.
        """
        if self.is_dangerous():
            # Already marked as dangerous, do nothing
            return
        self.log_details['dangerous'] = True
        path, filename = os.path.split(self.dst_path)
@ -126,8 +135,7 @@ class FileBase(object):
    def make_unknown(self):
        """Marks a file as an unknown type and prepends UNKNOWN to filename."""
-        if self.is_dangerous() or self.log_details.get('binary'):
+        if self.is_dangerous() or self.is_binary():
            # Already marked as dangerous or binary, do nothing
            return
        self.log_details['unknown'] = True
        path, filename = os.path.split(self.dst_path)
@ -136,7 +144,6 @@ class FileBase(object):
    def make_binary(self):
        """Marks a file as a binary and appends .bin to filename."""
        if self.is_dangerous():
            # Already marked as dangerous, do nothing
            return
        self.log_details['binary'] = True
        path, filename = os.path.split(self.dst_path)
@ -179,8 +186,8 @@ class KittenGroomerBase(object):
            self.log_debug_out = os.devnull
    def _computehash(self, path):
-        """Returns a sha1 hash of a file at a given path."""
+        """Returns a sha256 hash of a file at a given path."""
-        s = hashlib.sha1()
+        s = hashlib.sha256()
        with open(path, 'rb') as f:
            while True:
                buf = f.read(0x100000)
@ -260,9 +267,10 @@ class KittenGroomerBase(object):
    def _safe_metadata_split(self, ext):
        """Create a separate file to hold this file's metadata."""
        # TODO: fix logic in this method
        dst = self.cur_file.dst_path
        try:
-            if os.path.exists(self.cur_file.src_path + ext): # should we check dst_path as well?
+            if os.path.exists(self.cur_file.src_path + ext):  # should we check dst_path as well?
                raise KittenGroomerError("Cannot create split metadata file for \"" +
                                         self.cur_file.dst_path + "\", type '" +
                                         ext + "': File exists.")
--- a/playground/README.md
+++ b/playground/README.md
@ -1 +0,0 @@
 This directory contains extra files that may or may not be used in the project
--- a/playground/usb_lookup.py
+++ b/playground/usb_lookup.py
@ -1,16 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 from usb.core import find
 import usb.control
 def is_mass_storage(dev):
    import usb.util
    for cfg in dev:
        if usb.util.find_descriptor(cfg, bInterfaceClass=8) is not None:
            return True
 for mass in find(find_all=True, custom_match=is_mass_storage):
    print(mass)
--- a/setup.py
+++ b/setup.py
@ -4,23 +4,21 @@ from setuptools import setup
 setup(
    name='kittengroomer',
-    version='2.0.2',
+    version='2.1',
    author='Raphaël Vinot',
    author_email='raphael.vinot@circl.lu',
    maintainer='Raphaël Vinot',
    url='https://github.com/CIRCL/CIRCLean',
    description='Standalone CIRCLean/KittenGroomer code.',
    packages=['kittengroomer'],
-    scripts=['bin/generic.py', 'bin/pier9.py', 'bin/specific.py', 'bin/filecheck.py'],
+    scripts=[
-    include_package_data=True,
+        'bin/filecheck.py'
-    package_data={'data': ['PDFA_def.ps', 'srgb.icc']},
+    ],
    test_suite="tests",
    classifiers=[
        'License :: OSI Approved :: BSD License',
        'Development Status :: 5 - Production/Stable',
        'Environment :: Console',
        'Intended Audience :: Science/Research',
        'Programming Language :: Python :: 2.7',
        'Programming Language :: Python :: 3',
        'Topic :: Communications :: File Sharing',
        'Topic :: Security',
--- a/tests/logging.py
+++ b/tests/logging.py
@ -0,0 +1,22 @@
 import os
 def save_logs(groomer, test_description):
    divider = ('=' * 10 + '{}' + '=' * 10 + '\n')
    test_log_path = 'tests/test_logs/{}.log'.format(test_description)
    with open(test_log_path, 'w+') as test_log:
        test_log.write(divider.format('TEST LOG'))
        with open(groomer.log_processing, 'r') as logfile:
            log = logfile.read()
            test_log.write(log)
        if groomer.debug:
            if os.path.exists(groomer.log_debug_err):
                test_log.write(divider.format('ERR LOG'))
                with open(groomer.log_debug_err, 'r') as debug_err:
                    err = debug_err.read()
                    test_log.write(err)
            if os.path.exists(groomer.log_debug_out):
                test_log.write(divider.format('OUT LOG'))
                with open(groomer.log_debug_out, 'r') as debug_out:
                    out = debug_out.read()
                    test_log.write(out)
--- a/tests/oldtests.py
+++ b/tests/oldtests.py
@ -1,95 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 import unittest
 import os
 import sys
 if __name__ == '__main__':
    sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir))
 from bin.specific import KittenGroomerSpec
 from bin.pier9 import KittenGroomerPier9
 from bin.generic import KittenGroomer
 if sys.version_info.major == 2:
    from bin.filecheck import KittenGroomerFileCheck
 from kittengroomer import FileBase
 class TestBasic(unittest.TestCase):
    def setUp(self):
        self.maxDiff = None
        self.curpath = os.getcwd()
    def dump_logs(self, kg):
        print(open(kg.log_processing, 'rb').read())
        if kg.debug:
            if os.path.exists(kg.log_debug_err):
                print(open(kg.log_debug_err, 'rb').read())
            if os.path.exists(kg.log_debug_out):
                print(open(kg.log_debug_out, 'rb').read())
    def test_specific_valid(self):
        src = os.path.join(self.curpath, 'tests/src2')
        dst = os.path.join(self.curpath, 'tests/dst')
        spec = KittenGroomerSpec(src, dst, debug=True)
        spec.processdir()
        self.dump_logs(spec)
    def test_specific_invalid(self):
        src = os.path.join(self.curpath, 'tests/src')
        dst = os.path.join(self.curpath, 'tests/dst')
        spec = KittenGroomerSpec(src, dst, debug=True)
        spec.processdir()
        self.dump_logs(spec)
    def test_pier9(self):
        src = os.path.join(self.curpath, 'tests/src')
        dst = os.path.join(self.curpath, 'tests/dst')
        spec = KittenGroomerPier9(src, dst, debug=True)
        spec.processdir()
        self.dump_logs(spec)
    def test_generic(self):
        src = os.path.join(self.curpath, 'tests/src2')
        dst = os.path.join(self.curpath, 'tests/dst')
        spec = KittenGroomer(src, dst, debug=True)
        spec.processdir()
        self.dump_logs(spec)
    def test_generic_2(self):
        src = os.path.join(self.curpath, 'tests/src')
        dst = os.path.join(self.curpath, 'tests/dst')
        spec = KittenGroomer(src, dst, debug=True)
        spec.processdir()
        self.dump_logs(spec)
    def test_filecheck(self):
        if sys.version_info.major >= 3:
            return
        src = os.path.join(self.curpath, 'tests/src')
        dst = os.path.join(self.curpath, 'tests/dst')
        spec = KittenGroomerFileCheck(src, dst, debug=True)
        spec.processdir()
        self.dump_logs(spec)
    def test_filecheck_2(self):
        if sys.version_info.major >= 3:
            return
        src = os.path.join(self.curpath, 'tests/src2')
        dst = os.path.join(self.curpath, 'tests/dst')
        spec = KittenGroomerFileCheck(src, dst, debug=True)
        spec.processdir()
        self.dump_logs(spec)
    def test_help_file(self):
        f = FileBase('tests/src/blah.conf', 'tests/dst/blah.conf')
        f.make_unknown()
        f.make_binary()
        f.make_unknown()
        f.make_dangerous()
        f.make_binary()
        f.make_dangerous()
--- a/tests/src_invalid/42.zip
+++ b/tests/src_invalid/42.zip
--- a/tests/src_invalid/autorun.inf
+++ b/tests/src_invalid/autorun.inf
@ -0,0 +1,4 @@
 [autorun]
 open=setup.exe
 icon=setup.ico
 label=My install CD
--- a/tests/src_invalid/blah.conf
+++ b/tests/src_invalid/blah.conf
--- a/tests/src_invalid/blah.tar.bz2
+++ b/tests/src_invalid/blah.tar.bz2
--- a/tests/src_invalid/blah.txt
+++ b/tests/src_invalid/blah.txt
--- a/tests/src_invalid/blah.zip
+++ b/tests/src_invalid/blah.zip
--- a/tests/src_invalid/foobar.dat
+++ b/tests/src_invalid/foobar.dat
--- a/tests/src_invalid/geneve_1564.pdf
+++ b/tests/src_invalid/geneve_1564.pdf
--- a/tests/src_invalid/geneve_1564_wrong_mime.conf
+++ b/tests/src_invalid/geneve_1564_wrong_mime.conf
--- a/tests/src_invalid/message.msg
+++ b/tests/src_invalid/message.msg
--- a/tests/src_invalid/ntree.wrl
+++ b/tests/src_invalid/ntree.wrl
--- a/tests/src_invalid/test.obj
+++ b/tests/src_invalid/test.obj
@ -0,0 +1 @@
 blah
--- a/tests/src_simple/blah.conf
+++ b/tests/src_simple/blah.conf
--- a/tests/test_examples.py
+++ b/tests/test_examples.py
@ -1,88 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 import os
 import sys
 import pytest
 from bin.specific import KittenGroomerSpec
 from bin.pier9 import KittenGroomerPier9
 from bin.generic import KittenGroomer
 if sys.version_info.major == 2:
    from bin.filecheck import KittenGroomerFileCheck
 skip = pytest.mark.skip
 py2_only = pytest.mark.skipif(sys.version_info.major == 3,
                                reason="filecheck.py only runs on python 2")
@pytest.fixture
 def src_simple():
    return os.path.join(os.getcwd(), 'tests/src_simple')
@pytest.fixture
 def src_complex():
    return os.path.join(os.getcwd(), 'tests/src_complex')
@pytest.fixture
 def dst():
    return os.path.join(os.getcwd(), 'tests/dst')
 def test_specific_valid(src_simple, dst):
    spec = KittenGroomerSpec(src_simple, dst, debug=True)
    spec.processdir()
    dump_logs(spec)
 def test_specific_invalid(src_complex, dst):
    spec = KittenGroomerSpec(src_complex, dst, debug=True)
    spec.processdir()
    dump_logs(spec)
 def test_pier9(src_complex, dst):
    spec = KittenGroomerPier9(src_complex, dst, debug=True)
    spec.processdir()
    dump_logs(spec)
 def test_generic(src_simple, dst):
    spec = KittenGroomer(src_simple, dst, debug=True)
    spec.processdir()
    dump_logs(spec)
 def test_generic_2(src_complex, dst):
    spec = KittenGroomer(src_complex, dst, debug=True)
    spec.processdir()
    dump_logs(spec)
@py2_only
 def test_filecheck(src_complex, dst):
    spec = KittenGroomerFileCheck(src_complex, dst, debug=True)
    spec.processdir()
    dump_logs(spec)
@py2_only
 def test_filecheck_2(src_simple, dst):
    spec = KittenGroomerFileCheck(src_simple, dst, debug=True)
    spec.processdir()
    dump_logs(spec)
 ## Helper functions
 def dump_logs(spec):
    print(open(spec.log_processing, 'rb').read())
    if spec.debug:
        if os.path.exists(spec.log_debug_err):
            print(open(spec.log_debug_err, 'rb').read())
        if os.path.exists(spec.log_debug_out):
            print(open(spec.log_debug_out, 'rb').read())
--- a/tests/test_filecheck.py
+++ b/tests/test_filecheck.py
@ -0,0 +1,48 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 import os
 import pytest
 from tests.logging import save_logs
 try:
    from bin.filecheck import KittenGroomerFileCheck, File, main
    NODEPS = False
 except ImportError:
    NODEPS = True
 skipif_nodeps = pytest.mark.skipif(NODEPS,
                                   reason="Dependencies aren't installed")
@skipif_nodeps
 class TestIntegration:
    @pytest.fixture
    def src_valid(self):
        return os.path.join(os.getcwd(), 'tests/src_valid')
    @pytest.fixture
    def src_invalid(self):
        return os.path.join(os.getcwd(), 'tests/src_invalid')
    @pytest.fixture
    def dst(self):
        return os.path.join(os.getcwd(), 'tests/dst')
    def test_filecheck(self, src_invalid, dst):
        groomer = KittenGroomerFileCheck(src_invalid, dst, debug=True)
        groomer.processdir()
        test_description = "filecheck_invalid"
        save_logs(groomer, test_description)
    def test_filecheck_2(self, src_valid, dst):
        groomer = KittenGroomerFileCheck(src_valid, dst, debug=True)
        groomer.processdir()
        test_description = "filecheck_valid"
        save_logs(groomer, test_description)
 class TestFileHandling:
    pass
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@ -1,25 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 import os
 import kittengroomer as kg
 import bin.specific as specific
 PATH = os.getcwd() + '/tests/'
 def test_base():
    assert kg.FileBase
    assert kg.KittenGroomerBase
    assert kg.main
 def test_help_file():
        f = kg.FileBase('tests/src_complex/blah.conf', 'tests/dst/blah.conf')
        f.make_unknown()
        f.make_binary()
        f.make_unknown()
        f.make_dangerous()
        f.make_binary()
        f.make_dangerous()
--- a/tests/test_kittengroomer.py
+++ b/tests/test_kittengroomer.py
@ -2,14 +2,12 @@
 # -*- coding: utf-8 -*-
 import os
 import sys
 import pytest
 from kittengroomer import FileBase, KittenGroomerBase
 from kittengroomer.helpers import ImplementationRequired
 PY3 = sys.version_info.major == 3
 skip = pytest.mark.skip
 xfail = pytest.mark.xfail
 fixture = pytest.fixture
@ -21,7 +19,7 @@ class TestFileBase:
    @fixture
    def source_file(self):
-        return 'tests/src_simple/blah.conf'
+        return 'tests/src_valid/blah.conf'
    @fixture
    def dest_file(self):
@ -84,23 +82,15 @@ class TestFileBase:
    # We should probably catch everytime that happens and tell the user explicitly happened (and maybe put it in the log)
    def test_create(self):
-        file = FileBase('tests/src_simple/blah.conf', '/tests/dst/blah.conf')
+        file = FileBase('tests/src_valid/blah.conf', '/tests/dst/blah.conf')
    def test_create_broken(self, tmpdir):
        with pytest.raises(TypeError):
            file_no_args = FileBase()
-        if PY3:
+        with pytest.raises(FileNotFoundError):
-            with pytest.raises(FileNotFoundError):
+            file_empty_args = FileBase('', '')
-                file_empty_args = FileBase('', '')
+        with pytest.raises(IsADirectoryError):
-        else:
+            file_directory = FileBase(tmpdir.strpath, tmpdir.strpath)
            with pytest.raises(IOError):
                file_empty_args = FileBase('', '')
        if PY3:
            with pytest.raises(IsADirectoryError):
                file_directory = FileBase(tmpdir.strpath, tmpdir.strpath)
        else:
            with pytest.raises(IOError):
                file_directory = FileBase(tmpdir.strpath, tmpdir.strpath)
        # are there other cases here? path to a file that doesn't exist? permissions?
    def test_init(self, generic_conf_file):
@ -113,6 +103,13 @@ class TestFileBase:
        # assert file.log_details == copied_log     # this fails for now, we need to make log_details undeletable
        # we should probably check for more extensions here
    def test_extension_uppercase(self, tmpdir):
        file_path = tmpdir.join('TEST.TXT')
        file_path.write('testing')
        file_path = file_path.strpath
        file = FileBase(file_path, file_path)
        assert file.extension == '.txt'
    def test_mimetypes(self, generic_conf_file):
        assert generic_conf_file.has_mimetype()
        assert generic_conf_file.mimetype == 'text/plain'
@ -221,7 +218,7 @@ class TestKittenGroomerBase:
    @fixture
    def source_directory(self):
-        return 'tests/src_complex'
+        return 'tests/src_invalid'
    @fixture
    def dest_directory(self):
--- a/tests/test_logs/.keepdir
+++ b/tests/test_logs/.keepdir
--- a/tox.ini
+++ b/tox.ini
@ -1,5 +1,5 @@
 [tox]
-envlist=py27,py35
+envlist=py35
 [testenv]
 deps=-rdev-requirements.txt
-commands= pytest tests/test_helpers.py --cov=kittengroomer
+commands= pytest --cov=kittengroomer --cov=bin
`@ -1 +1 @@`
	`include kittengroomer/data/* README.md CONTRIBUTING.md CHANGELOG dev-requirements.txt`	`include README.md CONTRIBUTING.md CHANGELOG dev-requirements.txt`
		`@ -1 +0,0 @@`
			`This directory contains extra files that may or may not be used in the project`