diff --git a/.travis.yml b/.travis.yml index 6102777..99ea777 100644 --- a/.travis.yml +++ b/.travis.yml @@ -43,7 +43,7 @@ install: - sudo apt-get install -y libreoffice libreoffice-script-provider-python unoconv # filecheck.py dependencies - sudo apt-get install libxml2-dev libxslt1-dev - - pip install lxml exifread pillow + - pip install -U lxml exifread pillow - pip install git+https://github.com/Rafiot/officedissector.git - if [ $(python -c 'import sys; print(sys.version_info.major)') == '2' ]; then pip install oletools olefile ; fi - wget https://didierstevens.com/files/software/pdfid_v0_2_1.zip @@ -79,7 +79,7 @@ install: - popd script: - - travis_wait 30 coverage run --source=bin,kittengroomer setup.py test + - travis_wait 60 coverage run --source=bin,kittengroomer setup.py test notifications: email: diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py index 02df41b..3a20a74 100644 --- a/kittengroomer/helpers.py +++ b/kittengroomer/helpers.py @@ -1,6 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- import os +import sys import magic import hashlib import shutil @@ -41,7 +42,12 @@ class FileBase(object): # magic will throw an IOError on a broken symlink self.mimetype = 'inode/symlink' else: - mt = magic.from_file(self.src_path, mime=True) + try: + mt = magic.from_file(self.src_path, mime=True) + except UnicodeEncodeError as e: + # FIXME: The encoding of the file is broken (possibly UTF-16) + mt = '' + self.log_details.update({'UnicodeError': e}) try: self.mimetype = mt.decode("utf-8") except: @@ -168,6 +174,12 @@ class KittenGroomerBase(object): return s.hexdigest() def tree(self, base_dir, padding=' '): + if sys.version_info.major == 2: + self.__tree_py2(base_dir, padding) + else: + self.__tree_py3(base_dir, padding) + + def __tree_py2(self, base_dir, padding=' '): with open(self.log_content, 'ab') as lf: lf.write('#' * 80 + '\n') lf.write('{}+- {}/\n'.format(padding, os.path.basename(os.path.abspath(base_dir)))) @@ -182,6 +194,21 @@ class KittenGroomerBase(object): elif os.path.isfile(curpath): lf.write('{}+-- {}\t- {}\n'.format(padding, f, self._computehash(curpath))) + def __tree_py3(self, base_dir, padding=' '): + with open(self.log_content, 'ab') as lf: + lf.write(bytes('#' * 80 + '\n', 'UTF-8')) + lf.write(bytes('{}+- {}/\n'.format(padding, os.path.basename(os.path.abspath(base_dir)).encode()), 'utf8')) + padding += '| ' + files = sorted(os.listdir(base_dir)) + for f in files: + curpath = os.path.join(base_dir, f) + if os.path.islink(curpath): + lf.write('{}+-- {}\t- Symbolic link to {}\n'.format(padding, f, os.readlink(curpath)).encode(errors='ignore')) + elif os.path.isdir(curpath): + self.tree(curpath, padding) + elif os.path.isfile(curpath): + lf.write('{}+-- {}\t- {}\n'.format(padding, f, self._computehash(curpath)).encode(errors='ignore')) + # ##### Helpers ##### def _safe_rmtree(self, directory): '''Remove a directory tree if it exists''' diff --git a/tests/test.py b/tests/test.py index fff934e..888d3de 100755 --- a/tests/test.py +++ b/tests/test.py @@ -25,12 +25,12 @@ class TestBasic(unittest.TestCase): self.curpath = os.getcwd() def dump_logs(self, kg): - print(open(kg.log_processing, 'rb').read().decode("utf-8")) + print(open(kg.log_processing, 'rb').read()) if kg.debug: if os.path.exists(kg.log_debug_err): - print(open(kg.log_debug_err, 'rb').read().decode("utf-8")) + print(open(kg.log_debug_err, 'rb').read()) if os.path.exists(kg.log_debug_out): - print(open(kg.log_debug_out, 'rb').read().decode("utf-8")) + print(open(kg.log_debug_out, 'rb').read()) def test_specific_valid(self): src = os.path.join(self.curpath, 'tests/src2')