diff --git a/.gitignore b/.gitignore index 95e49f2..ecf6be3 100644 --- a/.gitignore +++ b/.gitignore @@ -67,8 +67,8 @@ target/ *.vrb # Project specific -/tests/dst/* -!/tests/logs/ -!/tests/.keepdir - - +tests/dst/* +tests/test_logs/* +!tests/**/.keepdir +!tests/src_invalid/* +!tests/src_valid/* diff --git a/.travis.yml b/.travis.yml index fc6e793..b778bf8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,6 @@ language: python python: - - 2.7 - 3.3 - 3.4 - 3.5 @@ -17,8 +16,6 @@ addons: packages: # General dependencies - p7zip-full - # generic.py dependencies - - ghostscript # Testing dependencies - mercurial @@ -26,45 +23,28 @@ install: # General dependencies - sudo add-apt-repository "deb http://archive.ubuntu.com/ubuntu/ trusty multiverse" && sudo add-apt-repository "deb http://archive.ubuntu.com/ubuntu/ trusty-updates multiverse" - sudo apt-get update -qq - - sudo apt-get install -y p7zip-rar - # generic.py: pdf2htmlEX + dependencies - - sudo add-apt-repository ppa:fontforge/fontforge --yes - # to get a working 0.26 poppler - - sudo add-apt-repository ppa:delayargentina/delayx --yes - - sudo apt-get update -qq - - sudo apt-get install -y libpoppler-dev libpoppler-private-dev libspiro-dev libcairo-dev libpango1.0-dev libfreetype6-dev libltdl-dev libfontforge-dev python-imaging python-pip firefox xvfb - - git clone https://github.com/coolwanglu/pdf2htmlEX.git - - pushd pdf2htmlEX - - cmake -DCMAKE_INSTALL_PREFIX:PATH=/usr -DENABLE_SVG=ON . - - make - - sudo make install - - popd - # generic.py: Other dependencies - - sudo apt-get install -y libreoffice libreoffice-script-provider-python unoconv + - sudo apt-get install -y p7zip-rar python-pip # filecheck.py dependencies - sudo apt-get install libxml2-dev libxslt1-dev - wget https://didierstevens.com/files/software/pdfid_v0_2_1.zip - unzip pdfid_v0_2_1.zip - pip install -U pip - - pip install lxml exifread pillow - - pip install git+https://github.com/Rafiot/officedissector.git - - | - if [[ "$TRAVIS_PYTHON_VERSION" == 2* ]]; then - pip install -U oletools olefile - fi - # Module dependencies + - pip install lxml exifread pillow olefile + - pip install git+https://github.com/decalage2/oletools.git + - pip install git+https://github.com/grierforensics/officedissector.git + # PyCIRCLean dependencies - pip install -r dev-requirements.txt - pip install coveralls codecov # Testing dependencies - sudo apt-get install rar # Prepare tests - # Zoo + # Malware from theZoo - git clone https://github.com/Rafiot/theZoo.git - pushd theZoo/malwares/Binaries - python unpackall.py - popd - - mv theZoo/malwares/Binaries/out tests/src_complex/ - # Path traversal + - mv theZoo/malwares/Binaries/out tests/src_invalid/ + # Path traversal attacks - git clone https://github.com/jwilk/path-traversal-samples - pushd path-traversal-samples - pushd zip @@ -74,25 +54,25 @@ install: - make - popd - popd - - mv path-traversal-samples/zip/*.zip tests/src_complex/ - - mv path-traversal-samples/rar/*.rar tests/src_complex/ + - mv path-traversal-samples/zip/*.zip tests/src_invalid/ + - mv path-traversal-samples/rar/*.rar tests/src_invalid/ # Office docs - git clone https://github.com/eea/odfpy.git - - mv odfpy/tests/examples/* tests/src_complex/ - - pushd tests/src_complex/ + - mv odfpy/tests/examples/* tests/src_invalid/ + - pushd tests/src_invalid/ - wget https://bitbucket.org/decalage/olefileio_pl/raw/3073963b640935134ed0da34906fea8e506460be/Tests/images/test-ole-file.doc - wget --no-check-certificate https://www.officedissector.com/corpus/fraunhoferlibrary.zip - unzip -o fraunhoferlibrary.zip - rm fraunhoferlibrary.zip - - 7z x 42.zip -p42 + - 7z x -p42 42.zip + # Some random samples - wget http://www.sample-videos.com/audio/mp3/india-national-anthem.mp3 - wget http://www.sample-videos.com/video/mp4/720/big_buck_bunny_720p_1mb.mp4 - wget http://thewalter.net/stef/software/rtfx/sample.rtf - - echo "blah" > test.obj - popd script: - - travis_wait 60 py.test --cov=kittengroomer --cov=bin tests/ + - travis_wait 30 py.test --cov=kittengroomer --cov=bin tests/ notifications: email: diff --git a/CHANGELOG b/CHANGELOG deleted file mode 100644 index 7a1ad90..0000000 --- a/CHANGELOG +++ /dev/null @@ -1,9 +0,0 @@ -Changelog -========= - -2.1.0 ---- - -New features: - -Fixes: diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..df1a217 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,19 @@ +Changelog +========= + +2.1.0 +--- + +New features: +- Dropped Python 2.7 support: PyCIRCLean is now Python 3.3+ only +- Tests are now easier to write and run: we have support for pytest and tox! +- More documentation: both docstrings and more detailed readmes +- Added more types of examples for testing +- The Travis build now runs in ~10 minutes vs. ~30 minutes before + + +Fixes: +- Extension matching now catches lower/upper case errors +- Fixed remaining python 3 issues with filecheck.py +- Fixed support for .rtf files +- Many other small filetype related fixes diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c27f429..e514393 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -29,5 +29,13 @@ or if you have an example you'd like to contribute. Running the tests ================= -* Running the tests is easy. First, make sure you've installed the project and testing dependencies. -Then, run `python -m pytest` or just `pytest` in the top level or /tests directory. +* Running the tests is fairly straightforward. +* First, make sure you've installed the project and testing dependencies. +* Then, run `python -m pytest` or just `pytest` in the top level directory of the module. +* Each integration test that runs will generate a timestamped copy of the log for that run +in the tests/testlogs directory. +* If you'd like to get information about code coverage, run the tests using +`pytest --cov=kittengroomer`. +* You can test with multiple versions of Python if you have them installed +by running `pip install tox` and then `tox`. Make sure you modify "envlist" +in tox.ini for the Python versions you plan to use. diff --git a/MANIFEST.in b/MANIFEST.in index 4c93c9d..3194a32 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1 +1 @@ -include kittengroomer/data/* README.md CONTRIBUTING.md CHANGELOG dev-requirements.txt +include README.md CONTRIBUTING.md CHANGELOG dev-requirements.txt diff --git a/README.md b/README.md index 91583e2..19eb6d3 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ PyCIRCLean is the core Python code used by [CIRCLean](https://github.com/CIRCL/Circlean/), an open-source USB key and document sanitizer created by [CIRCL](https://www.circl.lu/). This module has been separated from the device-specific scripts and can be used for dedicated security applications to sanitize documents from hostile environments -to trusted environments. +to trusted environments. PyCIRCLean is currently Python 3.3+ only. # Installation @@ -26,7 +26,7 @@ pip install . PyCIRCLean is a simple Python library to handle file checking and sanitization. PyCIRCLean is designed as a simple library that can be overloaded to cover specific checking and sanitization workflows in different organizations like industrial environments or restricted/classified ICT environments. A series of practical examples utilizing PyCIRCLean can be found -in the [./bin](./bin) directory. +in the [./examples](./examples) directory. The following simple example using PyCIRCLean will only copy files with a .conf extension matching the 'text/plain' MIME type. If any other file is found in the source directory, the files won't be copied to the destination directory. diff --git a/bin/README.md b/bin/README.md index f37894e..f509f34 100644 --- a/bin/README.md +++ b/bin/README.md @@ -1,70 +1,33 @@ -Example scripts -=============== - -These are a series of example scripts designed to demonstrate PyCIRCLean's capabilities. Feel free to -adapt or modify any of them to suit your requirements. In order to use any of these scripts, you will need to -install the PyCIRCLean dependencies (preferably in a virtualenv): - -``` - pip install git+https://github.com/ahupp/python-magic.git # we cannot use the PyPi package for now due to a bug - python setup.py install # from the root of the repository -``` - -Requirements per script -======================= - filecheck.py ------------- +============ -*WARNING*: Only works with Python 2.7 (oletools and olefile aren't ported to Python3 for now) +This is the script used by the [CIRCLean](https://github.com/CIRCL/Circlean) +USB key sanitizer. It is designed to handle a range of file types, and will +mark them as dangerous if they meet certain criteria. -Requirements by type of document: +Before installing the filecheck.py depenencies, make sure to install the PyCIRCLean +dependencies: + +``` + pip install . +``` + +Dependencies by type of document: * Microsoft office: oletools, olefile * OOXML: officedissector * PDF: pdfid * Archives: p7zip-full, p7zip-rar +* Metadata: exifread +* Images: pillow +Note: pdfid is a not installable with pip. It must be downloaded and installed +manually in the directory where filecheck will be run. ``` sudo apt-get install p7zip-full p7zip-rar libxml2-dev libxslt1-dev - pip install lxml officedissector git+https://github.com/ahupp/python-magic.git oletools olefile + pip install lxml oletools olefile pillow exifread pip install git+https://github.com/Rafiot/officedissector.git - # pdfid is not a package, installing manually + # installing pdfid manually wget https://didierstevens.com/files/software/pdfid_v0_2_1.zip unzip pdfid_v0_2_1.zip - python setup.py -q install ``` - -generic.py ----------- - -Requirements by type of document: -* Office and all text files: unoconv, libreoffice -* PDF: ghostscript, pdf2htmlEX - -``` - # required for pdf2htmlEX - sudo add-apt-repository ppa:fontforge/fontforge --yes - sudo add-apt-repository ppa:coolwanglu/pdf2htmlex --yes - sudo apt-get update -qq - sudo apt-get install -qq libpoppler-dev libpoppler-private-dev libspiro-dev libcairo-dev libpango1.0-dev libfreetype6-dev libltdl-dev libfontforge-dev python-imaging python-pip firefox xvfb - # install pdf2htmlEX - git clone https://github.com/coolwanglu/pdf2htmlEX.git - pushd pdf2htmlEX - cmake -DCMAKE_INSTALL_PREFIX:PATH=/usr -DENABLE_SVG=ON . - make - sudo make install - popd - # Installing the rest - sudo apt-get install ghostscript p7zip-full p7zip-rar libreoffice unoconv -``` - -pier9.py --------- - -No external dependencies required. - -specific.py ------------ - -No external dependencies required. diff --git a/bin/filecheck.py b/bin/filecheck.py index 1d3873c..04a599c 100644 --- a/bin/filecheck.py +++ b/bin/filecheck.py @@ -1,11 +1,9 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- import os -import sys import mimetypes import shlex import subprocess -import time import zipfile import oletools.oleid @@ -21,8 +19,7 @@ from pdfid import PDFiD, cPDFiD from kittengroomer import FileBase, KittenGroomerBase, main -SEVENZ = '/usr/bin/7z' -PY3 = sys.version_info.major == 3 +SEVENZ_PATH = '/usr/bin/7z' # Prepare application/ @@ -41,7 +38,7 @@ mimes_data = ['octet-stream'] mimes_exif = ['image/jpeg', 'image/tiff'] mimes_png = ['image/png'] -# Mime types we can pull metadata from +# Mimetypes we can pull metadata from mimes_metadata = ['image/jpeg', 'image/tiff', 'image/png'] # Aliases @@ -62,7 +59,7 @@ propertype = {'.gz': 'application/gzip'} # Commonly used malicious extensions # Sources: http://www.howtogeek.com/137270/50-file-extensions-that-are-potentially-dangerous-on-windows/ # https://github.com/wiregit/wirecode/blob/master/components/core-settings/src/main/java/org/limewire/core/settings/FilterSettings.java -mal_ext = ( +MAL_EXTS = ( # Applications ".exe", ".pif", ".application", ".gadget", ".msi", ".msp", ".com", ".scr", ".hta", ".cpl", ".msc", ".jar", @@ -86,55 +83,58 @@ mal_ext = ( class File(FileBase): def __init__(self, src_path, dst_path): - ''' Init file object, set the mimetype ''' super(File, self).__init__(src_path, dst_path) - self.is_recursive = False - if not self.has_mimetype(): - # No mimetype, should not happen. - self.make_dangerous() - - if not self.has_extension(): - self.make_dangerous() - - if self.extension in mal_ext: - self.log_details.update({'malicious_extension': self.extension}) - self.make_dangerous() - + self._check_dangerous() if self.is_dangerous(): return self.log_details.update({'maintype': self.main_type, 'subtype': self.sub_type, 'extension': self.extension}) + self._check_extension() + self._check_mime() - # Check correlation known extension => actual mime type + def _check_dangerous(self): + if not self.has_mimetype(): + # No mimetype, should not happen. + self.make_dangerous() + if not self.has_extension(): + self.make_dangerous() + if self.extension in MAL_EXTS: + self.log_details.update({'malicious_extension': self.extension}) + self.make_dangerous() + + def _check_extension(self): + """Guesses the file's mimetype based on its extension. If the file's + mimetype (as determined by libmagic) is contained in the mimetype + module's list of valid mimetypes and the expected mimetype based on its + extension differs from the mimetype determined by libmagic, then it + marks the file as dangerous.""" if propertype.get(self.extension) is not None: expected_mimetype = propertype.get(self.extension) else: expected_mimetype, encoding = mimetypes.guess_type(self.src_path, strict=False) if aliases.get(expected_mimetype) is not None: expected_mimetype = aliases.get(expected_mimetype) - is_known_extension = self.extension in mimetypes.types_map.keys() if is_known_extension and expected_mimetype != self.mimetype: self.log_details.update({'expected_mimetype': expected_mimetype}) self.make_dangerous() - # check correlation actual mime type => known extensions + def _check_mime(self): + """Takes the mimetype (as determined by libmagic) and determines + whether the list of extensions that are normally associated with + that extension contains the file's actual extension.""" if aliases.get(self.mimetype) is not None: mimetype = aliases.get(self.mimetype) else: mimetype = self.mimetype - expected_extensions = mimetypes.guess_all_extensions(mimetype, strict=False) if expected_extensions: if len(self.extension) > 0 and self.extension not in expected_extensions: self.log_details.update({'expected_extensions': expected_extensions}) self.make_dangerous() - else: - # there are no known extensions associated to this mimetype. - pass def has_metadata(self): if self.mimetype in mimes_metadata: @@ -144,18 +144,14 @@ class File(FileBase): class KittenGroomerFileCheck(KittenGroomerBase): - def __init__(self, root_src=None, root_dst=None, max_recursive=2, debug=False): - ''' - Initialize the basics of the conversion process - ''' + def __init__(self, root_src=None, root_dst=None, max_recursive_depth=2, debug=False): if root_src is None: root_src = os.path.join(os.sep, 'media', 'src') if root_dst is None: root_dst = os.path.join(os.sep, 'media', 'dst') super(KittenGroomerFileCheck, self).__init__(root_src, root_dst, debug) - - self.recursive = 0 - self.max_recursive = max_recursive + self.recursive_archive_depth = 0 + self.max_recursive_depth = max_recursive_depth subtypes_apps = [ (mimes_office, self._winoffice), @@ -189,21 +185,18 @@ class KittenGroomerFileCheck(KittenGroomerBase): 'inode': self.inode, } - # ##### Helpers ##### + # ##### Helper functions ##### def _init_subtypes_application(self, subtypes_application): - ''' - Create the Dict to pick the right function based on the sub mime type - ''' - to_return = {} - for list_subtypes, fct in subtypes_application: + """Creates a dictionary with the right method based on the sub mime type.""" + subtype_dict = {} + for list_subtypes, func in subtypes_application: for st in list_subtypes: - to_return[st] = fct - return to_return + subtype_dict[st] = func + return subtype_dict def _print_log(self): - ''' - Print the logs related to the current file being processed - ''' + """Print the logs related to the current file being processed.""" + # TODO: change name to _write_log tmp_log = self.log_name.fields(**self.cur_file.log_details) if self.cur_file.is_dangerous(): tmp_log.warning(self.cur_file.log_string) @@ -212,66 +205,53 @@ class KittenGroomerFileCheck(KittenGroomerBase): else: tmp_log.debug(self.cur_file.log_string) - def _run_process(self, command_line, timeout=0, background=False): - '''Run subprocess, wait until it finishes''' - if timeout != 0: - deadline = time.time() + timeout - else: - deadline = None - args = shlex.split(command_line) + def _run_process(self, command_string, timeout=None): + """Run command_string in a subprocess, wait until it finishes.""" + args = shlex.split(command_string) with open(self.log_debug_err, 'ab') as stderr, open(self.log_debug_out, 'ab') as stdout: - p = subprocess.Popen(args, stdout=stdout, stderr=stderr) - if background: - # This timer is here to make sure the unoconv listener is properly started. - time.sleep(10) - return True - while True: - code = p.poll() - if code is not None: - break - if deadline is not None and time.time() > deadline: - p.kill() - break - time.sleep(1) + try: + subprocess.check_call(args, stdout=stdout, stderr=stderr, timeout=timeout) + except (subprocess.TimeoutExpired, subprocess.CalledProcessError): + return return True ####################### - - # ##### Discarded mime types, reason in the comments ###### + # ##### Discarded mimetypes, reason in the docstring ###### def inode(self): - ''' Usually empty file. No reason (?) to copy it on the dest key''' + """Empty file or symlink.""" if self.cur_file.is_symlink(): - self.cur_file.log_string += 'Symlink to {}'.format(self.log_details['symlink']) + self.cur_file.log_string += 'Symlink to {}'.format(self.cur_file.log_details['symlink']) else: self.cur_file.log_string += 'Inode file' def unknown(self): - ''' This main type is unknown, that should not happen ''' + """Main type should never be unknown.""" self.cur_file.log_string += 'Unknown file' def example(self): - '''Used in examples, should never be returned by libmagic''' + """Used in examples, should never be returned by libmagic.""" self.cur_file.log_string += 'Example file' def multipart(self): - '''Used in web apps, should never be returned by libmagic''' + """Used in web apps, should never be returned by libmagic""" self.cur_file.log_string += 'Multipart file' - # ##### Threated as malicious, no reason to have it on a USB key ###### + # ##### Treated as malicious, no reason to have it on a USB key ###### def message(self): - '''Way to process message file''' + """Process a message file.""" self.cur_file.log_string += 'Message file' self.cur_file.make_dangerous() self._safe_copy() def model(self): - '''Way to process model file''' + """Process a model file.""" self.cur_file.log_string += 'Model file' self.cur_file.make_dangerous() self._safe_copy() - # ##### Converted ###### + # ##### Files that will be converted ###### def text(self): + """Process an rtf, ooxml, or plaintext file.""" for r in mimes_rtf: if r in self.cur_file.sub_type: self.cur_file.log_string += 'Rich Text file' @@ -289,7 +269,7 @@ class KittenGroomerFileCheck(KittenGroomerBase): self._safe_copy() def application(self): - ''' Everything can be there, using the subtype to decide ''' + """Processes an application specific file according to its subtype.""" for subtype, fct in self.subtypes_application.items(): if subtype in self.cur_file.sub_type: fct() @@ -299,12 +279,13 @@ class KittenGroomerFileCheck(KittenGroomerBase): self._unknown_app() def _executables(self): - '''Way to process executable file''' + """Processes an executable file.""" self.cur_file.add_log_details('processing_type', 'executable') self.cur_file.make_dangerous() self._safe_copy() def _winoffice(self): + """Processes a winoffice file using olefile/oletools.""" self.cur_file.add_log_details('processing_type', 'WinOffice') # Try as if it is a valid document oid = oletools.oleid.OleID(self.cur_file.src_path) @@ -343,6 +324,7 @@ class KittenGroomerFileCheck(KittenGroomerBase): self._safe_copy() def _ooxml(self): + """Processes an ooxml file.""" self.cur_file.add_log_details('processing_type', 'ooxml') try: doc = officedissector.doc.Document(self.cur_file.src_path) @@ -369,6 +351,7 @@ class KittenGroomerFileCheck(KittenGroomerBase): self._safe_copy() def _libreoffice(self): + """Processes a libreoffice file.""" self.cur_file.add_log_details('processing_type', 'libreoffice') # As long as there ar no way to do a sanity check on the files => dangerous try: @@ -385,55 +368,69 @@ class KittenGroomerFileCheck(KittenGroomerBase): self._safe_copy() def _pdf(self): - '''Way to process PDF file''' + """Processes a PDF file.""" self.cur_file.add_log_details('processing_type', 'pdf') xmlDoc = PDFiD(self.cur_file.src_path) oPDFiD = cPDFiD(xmlDoc, True) # TODO: other keywords? - if oPDFiD.encrypt > 0: + if oPDFiD.encrypt.count > 0: self.cur_file.add_log_details('encrypted', True) self.cur_file.make_dangerous() - if oPDFiD.js > 0 or oPDFiD.javascript > 0: + if oPDFiD.js.count > 0 or oPDFiD.javascript.count > 0: self.cur_file.add_log_details('javascript', True) self.cur_file.make_dangerous() - if oPDFiD.aa > 0 or oPDFiD.openaction > 0: + if oPDFiD.aa.count > 0 or oPDFiD.openaction.count > 0: self.cur_file.add_log_details('openaction', True) self.cur_file.make_dangerous() - if oPDFiD.richmedia > 0: + if oPDFiD.richmedia.count > 0: self.cur_file.add_log_details('flash', True) self.cur_file.make_dangerous() - if oPDFiD.launch > 0: + if oPDFiD.launch.count > 0: self.cur_file.add_log_details('launch', True) self.cur_file.make_dangerous() def _archive(self): - '''Way to process Archive''' + """Processes an archive using 7zip. The archive is extracted to a + temporary directory and self.processdir is called on that directory. + The recursive archive depth is increased to protect against archive + bombs.""" self.cur_file.add_log_details('processing_type', 'archive') self.cur_file.is_recursive = True self.cur_file.log_string += 'Archive extracted, processing content.' tmpdir = self.cur_file.dst_path + '_temp' self._safe_mkdir(tmpdir) - extract_command = '{} -p1 x "{}" -o"{}" -bd -aoa'.format(SEVENZ, self.cur_file.src_path, tmpdir) + extract_command = '{} -p1 x "{}" -o"{}" -bd -aoa'.format(SEVENZ_PATH, self.cur_file.src_path, tmpdir) self._run_process(extract_command) - self.recursive += 1 + self.recursive_archive_depth += 1 self.tree(tmpdir) self.processdir(tmpdir, self.cur_file.dst_path) - self.recursive -= 1 + self.recursive_archive_depth -= 1 self._safe_rmtree(tmpdir) + def _handle_archivebomb(self, src_dir): + self.cur_file.make_dangerous() + self.cur_file.add_log_details('Archive Bomb', True) + self.log_name.warning('ARCHIVE BOMB.') + self.log_name.warning('The content of the archive contains recursively other archives.') + self.log_name.warning('This is a bad sign so the archive is not extracted to the destination key.') + self._safe_rmtree(src_dir) + if src_dir.endswith('_temp'): + bomb_path = src_dir[:-len('_temp')] + self._safe_remove(bomb_path) + def _unknown_app(self): - '''Way to process an unknown file''' + """Processes an unknown file.""" self.cur_file.make_unknown() self._safe_copy() def _binary_app(self): - '''Way to process an unknown binary file''' + """Processses an unknown binary file.""" self.cur_file.make_binary() self._safe_copy() ####################### # Metadata extractors - def _metadata_exif(self, metadataFile): + def _metadata_exif(self, metadata_file): img = open(self.cur_file.src_path, 'rb') tags = None @@ -459,11 +456,11 @@ class KittenGroomerFileCheck(KittenGroomerBase): # Exifreader truncates data. if len(printable) > 25 and printable.endswith(", ... ]"): value = tags[tag].values - if isinstance(value, basestring): + if isinstance(value, str): printable = value else: printable = str(value) - metadataFile.write("Key: {}\tValue: {}\n".format(tag, printable)) + metadata_file.write("Key: {}\tValue: {}\n".format(tag, printable)) self.cur_file.add_log_details('metadata', 'exif') img.close() return True @@ -487,22 +484,36 @@ class KittenGroomerFileCheck(KittenGroomerBase): return False def extract_metadata(self): - metadataFile = self._safe_metadata_split(".metadata.txt") - success = self.metadata_processing_options.get(self.cur_file.mimetype)(metadataFile) - metadataFile.close() + metadata_file = self._safe_metadata_split(".metadata.txt") + success = self.metadata_processing_options.get(self.cur_file.mimetype)(metadata_file) + metadata_file.close() if not success: # FIXME Delete empty metadata file pass ####################### - # ##### Not converted, checking the mime type ###### + # ##### Media - audio and video aren't converted ###### def audio(self): - '''Way to process an audio file''' + """Processes an audio file.""" self.cur_file.log_string += 'Audio file' self._media_processing() + def video(self): + """Processes a video.""" + self.cur_file.log_string += 'Video file' + self._media_processing() + + def _media_processing(self): + """Generic way to process all media files.""" + self.cur_file.add_log_details('processing_type', 'media') + self._safe_copy() + def image(self): - '''Way to process an image''' + """Processes an image. + + Extracts metadata if metadata is present. Creates a temporary + directory, opens the using PIL.Image, saves it to the temporary + directory, and copies it to the destination.""" if self.cur_file.has_metadata(): self.extract_metadata() @@ -534,52 +545,40 @@ class KittenGroomerFileCheck(KittenGroomerBase): self.cur_file.log_string += 'Image file' self.cur_file.add_log_details('processing_type', 'image') - def video(self): - '''Way to process a video''' - self.cur_file.log_string += 'Video file' - self._media_processing() - - def _media_processing(self): - '''Generic way to process all the media files''' - self.cur_file.add_log_details('processing_type', 'media') - self._safe_copy() - ####################### + def process_file(self, srcpath, dstpath, relative_path): + self.cur_file = File(srcpath, dstpath) + self.log_name.info('Processing {} ({}/{})', + relative_path, + self.cur_file.main_type, + self.cur_file.sub_type) + if not self.cur_file.is_dangerous(): + self.mime_processing_options.get(self.cur_file.main_type, self.unknown)() + else: + self._safe_copy() + if not self.cur_file.is_recursive: + self._print_log() + def processdir(self, src_dir=None, dst_dir=None): - ''' - Main function doing the processing - ''' + """Main function coordinating file processing.""" if src_dir is None: src_dir = self.src_root_dir if dst_dir is None: dst_dir = self.dst_root_dir - if self.recursive > 0: + if self.recursive_archive_depth > 0: self._print_log() - if self.recursive >= self.max_recursive: - self.cur_file.make_dangerous() - self.cur_file.add_log_details('Archive Bomb', True) - self.log_name.warning('ARCHIVE BOMB.') - self.log_name.warning('The content of the archive contains recursively other archives.') - self.log_name.warning('This is a bad sign so the archive is not extracted to the destination key.') - self._safe_rmtree(src_dir) - if src_dir.endswith('_temp'): - archbomb_path = src_dir[:-len('_temp')] - self._safe_remove(archbomb_path) + if self.recursive_archive_depth >= self.max_recursive_depth: + self._handle_archivebomb(src_dir) for srcpath in self._list_all_files(src_dir): - self.cur_file = File(srcpath, srcpath.replace(src_dir, dst_dir)) + dstpath = srcpath.replace(src_dir, dst_dir) + relative_path = srcpath.replace(src_dir + '/', '') + # which path do we want in the log? + self.process_file(srcpath, dstpath, relative_path) - self.log_name.info('Processing {} ({}/{})', srcpath.replace(src_dir + '/', ''), - self.cur_file.main_type, self.cur_file.sub_type) - if not self.cur_file.is_dangerous(): - self.mime_processing_options.get(self.cur_file.main_type, self.unknown)() - else: - self._safe_copy() - if not self.cur_file.is_recursive: - self._print_log() if __name__ == '__main__': - main(KittenGroomerFileCheck, 'Generic version of the KittenGroomer. Convert and rename files.') + main(KittenGroomerFileCheck, 'File sanitizer used in CIRCLean. Renames potentially dangerous files.') diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 0000000..933e1d9 --- /dev/null +++ b/examples/README.md @@ -0,0 +1,56 @@ +Examples +======== + +These are several sanitizers that demonstrate PyCIRCLean's capabilities. Feel free to +adapt or modify any of them to suit your requirements. In order to use any of these scripts, +you will first need to install the PyCIRCLean dependencies (preferably in a virtualenv): + +``` + pip install . +``` + +Requirements per script +======================= + +generic.py +---------- + +This is a script that was used by an older version of CIRCLean. + +Requirements by type of document: +* Office and all text files: unoconv, libreoffice +* PDF: ghostscript, pdf2htmlEX + +``` + # required for pdf2htmlEX + sudo add-apt-repository ppa:fontforge/fontforge --yes + sudo add-apt-repository ppa:coolwanglu/pdf2htmlex --yes + sudo apt-get update -qq + sudo apt-get install -qq libpoppler-dev libpoppler-private-dev libspiro-dev libcairo-dev libpango1.0-dev libfreetype6-dev libltdl-dev libfontforge-dev python-imaging python-pip firefox xvfb + # install pdf2htmlEX + git clone https://github.com/coolwanglu/pdf2htmlEX.git + pushd pdf2htmlEX + cmake -DCMAKE_INSTALL_PREFIX:PATH=/usr -DENABLE_SVG=ON . + make + sudo make install + popd + # Installing the rest + sudo apt-get install ghostscript p7zip-full p7zip-rar libreoffice unoconv +``` + +pier9.py +-------- + +This script contains a list of file formats for various brands of industrial +manufacturing equipment, such as 3d printers, CNC machines, etc. It only +copies files that match these file formats. + +No external dependencies required. + +specific.py +----------- + +As the name suggests, this script copies only specific file formats according +to the configuration provided by the user. + +No external dependencies required. diff --git a/bin/generic.py b/examples/generic.py similarity index 100% rename from bin/generic.py rename to examples/generic.py diff --git a/bin/pier9.py b/examples/pier9.py similarity index 100% rename from bin/pier9.py rename to examples/pier9.py diff --git a/bin/specific.py b/examples/specific.py similarity index 100% rename from bin/specific.py rename to examples/specific.py diff --git a/kittengroomer/data/PDFA_def.ps b/kittengroomer/data/PDFA_def.ps deleted file mode 100644 index f0ff0d1..0000000 --- a/kittengroomer/data/PDFA_def.ps +++ /dev/null @@ -1,40 +0,0 @@ -%! -% This is a sample prefix file for creating a PDF/A document. -% Feel free to modify entries marked with "Customize". -% This assumes an ICC profile to reside in the file (ISO Coated sb.icc), -% unless the user modifies the corresponding line below. - -% Define entries in the document Info dictionary : -/ICCProfile (srgb.icc) % Customise -def - -[ /Title (Title) % Customise - /DOCINFO pdfmark - -% Define an ICC profile : - -[/_objdef {icc_PDFA} /type /stream /OBJ pdfmark -[{icc_PDFA} -<< - /N currentpagedevice /ProcessColorModel known { - currentpagedevice /ProcessColorModel get dup /DeviceGray eq - {pop 1} { - /DeviceRGB eq - {3}{4} ifelse - } ifelse - } { - (ERROR, unable to determine ProcessColorModel) == flush - } ifelse ->> /PUT pdfmark -[{icc_PDFA} ICCProfile (r) file /PUT pdfmark - -% Define the output intent dictionary : - -[/_objdef {OutputIntent_PDFA} /type /dict /OBJ pdfmark -[{OutputIntent_PDFA} << - /Type /OutputIntent % Must be so (the standard requires). - /S /GTS_PDFA1 % Must be so (the standard requires). - /DestOutputProfile {icc_PDFA} % Must be so (see above). - /OutputConditionIdentifier (sRGB) % Customize ->> /PUT pdfmark -[{Catalog} <> /PUT pdfmark diff --git a/kittengroomer/data/srgb.icc b/kittengroomer/data/srgb.icc deleted file mode 100644 index 627e8fe..0000000 Binary files a/kittengroomer/data/srgb.icc and /dev/null differ diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py index 990f899..4e82a73 100644 --- a/kittengroomer/helpers.py +++ b/kittengroomer/helpers.py @@ -45,9 +45,13 @@ class FileBase(object): self.dst_path = dst_path self.log_details = {'filepath': self.src_path} self.log_string = '' - _, self.extension = os.path.splitext(self.src_path) + self._determine_extension() self._determine_mimetype() + def _determine_extension(self): + _, ext = os.path.splitext(self.src_path) + self.extension = ext.lower() + def _determine_mimetype(self): if os.path.islink(self.src_path): # magic will throw an IOError on a broken symlink @@ -55,6 +59,7 @@ class FileBase(object): else: try: mt = magic.from_file(self.src_path, mime=True) + # magic will always return something, even if it's just 'data' except UnicodeEncodeError as e: # FIXME: The encoding of the file is broken (possibly UTF-16) mt = '' @@ -76,7 +81,6 @@ class FileBase(object): Returns False + updates log if self.main_type or self.sub_type are not set. """ - if not self.main_type or not self.sub_type: self.log_details.update({'broken_mime': True}) return False @@ -88,16 +92,22 @@ class FileBase(object): Returns False + updates self.log_details if self.extension is not set. """ - if not self.extension: + if self.extension == '': self.log_details.update({'no_extension': True}) return False return True def is_dangerous(self): """Returns True if self.log_details contains 'dangerous'.""" - if self.log_details.get('dangerous'): - return True - return False + return ('dangerous' in self.log_details) + + def is_unknown(self): + """Returns True if self.log_details contains 'unknown'.""" + return ('unknown' in self.log_details) + + def is_binary(self): + """returns True if self.log_details contains 'binary'.""" + return ('binary' in self.log_details) def is_symlink(self): """Returns True and updates log if file is a symlink.""" @@ -115,10 +125,9 @@ class FileBase(object): Marks a file as dangerous. Prepends and appends DANGEROUS to the destination file name - to avoid double-click of death. + to help prevent double-click of death. """ if self.is_dangerous(): - # Already marked as dangerous, do nothing return self.log_details['dangerous'] = True path, filename = os.path.split(self.dst_path) @@ -126,8 +135,7 @@ class FileBase(object): def make_unknown(self): """Marks a file as an unknown type and prepends UNKNOWN to filename.""" - if self.is_dangerous() or self.log_details.get('binary'): - # Already marked as dangerous or binary, do nothing + if self.is_dangerous() or self.is_binary(): return self.log_details['unknown'] = True path, filename = os.path.split(self.dst_path) @@ -136,7 +144,6 @@ class FileBase(object): def make_binary(self): """Marks a file as a binary and appends .bin to filename.""" if self.is_dangerous(): - # Already marked as dangerous, do nothing return self.log_details['binary'] = True path, filename = os.path.split(self.dst_path) @@ -179,8 +186,8 @@ class KittenGroomerBase(object): self.log_debug_out = os.devnull def _computehash(self, path): - """Returns a sha1 hash of a file at a given path.""" - s = hashlib.sha1() + """Returns a sha256 hash of a file at a given path.""" + s = hashlib.sha256() with open(path, 'rb') as f: while True: buf = f.read(0x100000) @@ -260,9 +267,10 @@ class KittenGroomerBase(object): def _safe_metadata_split(self, ext): """Create a separate file to hold this file's metadata.""" + # TODO: fix logic in this method dst = self.cur_file.dst_path try: - if os.path.exists(self.cur_file.src_path + ext): # should we check dst_path as well? + if os.path.exists(self.cur_file.src_path + ext): # should we check dst_path as well? raise KittenGroomerError("Cannot create split metadata file for \"" + self.cur_file.dst_path + "\", type '" + ext + "': File exists.") diff --git a/playground/README.md b/playground/README.md deleted file mode 100644 index 76a9248..0000000 --- a/playground/README.md +++ /dev/null @@ -1 +0,0 @@ -This directory contains extra files that may or may not be used in the project diff --git a/playground/usb_lookup.py b/playground/usb_lookup.py deleted file mode 100644 index 76f14d7..0000000 --- a/playground/usb_lookup.py +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -from usb.core import find -import usb.control - - -def is_mass_storage(dev): - import usb.util - for cfg in dev: - if usb.util.find_descriptor(cfg, bInterfaceClass=8) is not None: - return True - - -for mass in find(find_all=True, custom_match=is_mass_storage): - print(mass) diff --git a/setup.py b/setup.py index d30afac..c11f64d 100644 --- a/setup.py +++ b/setup.py @@ -4,23 +4,21 @@ from setuptools import setup setup( name='kittengroomer', - version='2.0.2', + version='2.1', author='Raphaël Vinot', author_email='raphael.vinot@circl.lu', maintainer='Raphaël Vinot', url='https://github.com/CIRCL/CIRCLean', description='Standalone CIRCLean/KittenGroomer code.', packages=['kittengroomer'], - scripts=['bin/generic.py', 'bin/pier9.py', 'bin/specific.py', 'bin/filecheck.py'], - include_package_data=True, - package_data={'data': ['PDFA_def.ps', 'srgb.icc']}, - test_suite="tests", + scripts=[ + 'bin/filecheck.py' + ], classifiers=[ 'License :: OSI Approved :: BSD License', 'Development Status :: 5 - Production/Stable', 'Environment :: Console', 'Intended Audience :: Science/Research', - 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Topic :: Communications :: File Sharing', 'Topic :: Security', diff --git a/tests/logging.py b/tests/logging.py new file mode 100644 index 0000000..e625137 --- /dev/null +++ b/tests/logging.py @@ -0,0 +1,22 @@ +import os + + +def save_logs(groomer, test_description): + divider = ('=' * 10 + '{}' + '=' * 10 + '\n') + test_log_path = 'tests/test_logs/{}.log'.format(test_description) + with open(test_log_path, 'w+') as test_log: + test_log.write(divider.format('TEST LOG')) + with open(groomer.log_processing, 'r') as logfile: + log = logfile.read() + test_log.write(log) + if groomer.debug: + if os.path.exists(groomer.log_debug_err): + test_log.write(divider.format('ERR LOG')) + with open(groomer.log_debug_err, 'r') as debug_err: + err = debug_err.read() + test_log.write(err) + if os.path.exists(groomer.log_debug_out): + test_log.write(divider.format('OUT LOG')) + with open(groomer.log_debug_out, 'r') as debug_out: + out = debug_out.read() + test_log.write(out) diff --git a/tests/oldtests.py b/tests/oldtests.py deleted file mode 100755 index 888d3de..0000000 --- a/tests/oldtests.py +++ /dev/null @@ -1,95 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import unittest -import os -import sys - -if __name__ == '__main__': - sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir)) - -from bin.specific import KittenGroomerSpec -from bin.pier9 import KittenGroomerPier9 -from bin.generic import KittenGroomer - -if sys.version_info.major == 2: - from bin.filecheck import KittenGroomerFileCheck - -from kittengroomer import FileBase - - -class TestBasic(unittest.TestCase): - - def setUp(self): - self.maxDiff = None - self.curpath = os.getcwd() - - def dump_logs(self, kg): - print(open(kg.log_processing, 'rb').read()) - if kg.debug: - if os.path.exists(kg.log_debug_err): - print(open(kg.log_debug_err, 'rb').read()) - if os.path.exists(kg.log_debug_out): - print(open(kg.log_debug_out, 'rb').read()) - - def test_specific_valid(self): - src = os.path.join(self.curpath, 'tests/src2') - dst = os.path.join(self.curpath, 'tests/dst') - spec = KittenGroomerSpec(src, dst, debug=True) - spec.processdir() - self.dump_logs(spec) - - def test_specific_invalid(self): - src = os.path.join(self.curpath, 'tests/src') - dst = os.path.join(self.curpath, 'tests/dst') - spec = KittenGroomerSpec(src, dst, debug=True) - spec.processdir() - self.dump_logs(spec) - - def test_pier9(self): - src = os.path.join(self.curpath, 'tests/src') - dst = os.path.join(self.curpath, 'tests/dst') - spec = KittenGroomerPier9(src, dst, debug=True) - spec.processdir() - self.dump_logs(spec) - - def test_generic(self): - src = os.path.join(self.curpath, 'tests/src2') - dst = os.path.join(self.curpath, 'tests/dst') - spec = KittenGroomer(src, dst, debug=True) - spec.processdir() - self.dump_logs(spec) - - def test_generic_2(self): - src = os.path.join(self.curpath, 'tests/src') - dst = os.path.join(self.curpath, 'tests/dst') - spec = KittenGroomer(src, dst, debug=True) - spec.processdir() - self.dump_logs(spec) - - def test_filecheck(self): - if sys.version_info.major >= 3: - return - src = os.path.join(self.curpath, 'tests/src') - dst = os.path.join(self.curpath, 'tests/dst') - spec = KittenGroomerFileCheck(src, dst, debug=True) - spec.processdir() - self.dump_logs(spec) - - def test_filecheck_2(self): - if sys.version_info.major >= 3: - return - src = os.path.join(self.curpath, 'tests/src2') - dst = os.path.join(self.curpath, 'tests/dst') - spec = KittenGroomerFileCheck(src, dst, debug=True) - spec.processdir() - self.dump_logs(spec) - - def test_help_file(self): - f = FileBase('tests/src/blah.conf', 'tests/dst/blah.conf') - f.make_unknown() - f.make_binary() - f.make_unknown() - f.make_dangerous() - f.make_binary() - f.make_dangerous() diff --git a/tests/src_complex/42.zip b/tests/src_invalid/42.zip similarity index 100% rename from tests/src_complex/42.zip rename to tests/src_invalid/42.zip diff --git a/tests/src_invalid/autorun.inf b/tests/src_invalid/autorun.inf new file mode 100644 index 0000000..895e1a4 --- /dev/null +++ b/tests/src_invalid/autorun.inf @@ -0,0 +1,4 @@ +[autorun] +open=setup.exe +icon=setup.ico +label=My install CD diff --git a/tests/src_complex/blah.conf b/tests/src_invalid/blah.conf similarity index 100% rename from tests/src_complex/blah.conf rename to tests/src_invalid/blah.conf diff --git a/tests/src_complex/blah.tar.bz2 b/tests/src_invalid/blah.tar.bz2 similarity index 100% rename from tests/src_complex/blah.tar.bz2 rename to tests/src_invalid/blah.tar.bz2 diff --git a/tests/src_complex/blah.txt b/tests/src_invalid/blah.txt similarity index 100% rename from tests/src_complex/blah.txt rename to tests/src_invalid/blah.txt diff --git a/tests/src_complex/blah.zip b/tests/src_invalid/blah.zip similarity index 100% rename from tests/src_complex/blah.zip rename to tests/src_invalid/blah.zip diff --git a/tests/src_complex/foobar.dat b/tests/src_invalid/foobar.dat similarity index 100% rename from tests/src_complex/foobar.dat rename to tests/src_invalid/foobar.dat diff --git a/tests/src_complex/geneve_1564.pdf b/tests/src_invalid/geneve_1564.pdf similarity index 100% rename from tests/src_complex/geneve_1564.pdf rename to tests/src_invalid/geneve_1564.pdf diff --git a/tests/src_complex/geneve_1564_wrong_mime.conf b/tests/src_invalid/geneve_1564_wrong_mime.conf similarity index 100% rename from tests/src_complex/geneve_1564_wrong_mime.conf rename to tests/src_invalid/geneve_1564_wrong_mime.conf diff --git a/tests/src_complex/message.msg b/tests/src_invalid/message.msg similarity index 100% rename from tests/src_complex/message.msg rename to tests/src_invalid/message.msg diff --git a/tests/src_complex/ntree.wrl b/tests/src_invalid/ntree.wrl similarity index 100% rename from tests/src_complex/ntree.wrl rename to tests/src_invalid/ntree.wrl diff --git a/tests/src_invalid/test.obj b/tests/src_invalid/test.obj new file mode 100644 index 0000000..907b308 --- /dev/null +++ b/tests/src_invalid/test.obj @@ -0,0 +1 @@ +blah diff --git a/tests/src_simple/blah.conf b/tests/src_valid/blah.conf similarity index 100% rename from tests/src_simple/blah.conf rename to tests/src_valid/blah.conf diff --git a/tests/test_examples.py b/tests/test_examples.py deleted file mode 100644 index 79d9efb..0000000 --- a/tests/test_examples.py +++ /dev/null @@ -1,88 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import os -import sys - -import pytest - -from bin.specific import KittenGroomerSpec -from bin.pier9 import KittenGroomerPier9 -from bin.generic import KittenGroomer - -if sys.version_info.major == 2: - from bin.filecheck import KittenGroomerFileCheck - - -skip = pytest.mark.skip -py2_only = pytest.mark.skipif(sys.version_info.major == 3, - reason="filecheck.py only runs on python 2") - - -@pytest.fixture -def src_simple(): - return os.path.join(os.getcwd(), 'tests/src_simple') - - -@pytest.fixture -def src_complex(): - return os.path.join(os.getcwd(), 'tests/src_complex') - - -@pytest.fixture -def dst(): - return os.path.join(os.getcwd(), 'tests/dst') - - -def test_specific_valid(src_simple, dst): - spec = KittenGroomerSpec(src_simple, dst, debug=True) - spec.processdir() - dump_logs(spec) - - -def test_specific_invalid(src_complex, dst): - spec = KittenGroomerSpec(src_complex, dst, debug=True) - spec.processdir() - dump_logs(spec) - - -def test_pier9(src_complex, dst): - spec = KittenGroomerPier9(src_complex, dst, debug=True) - spec.processdir() - dump_logs(spec) - - -def test_generic(src_simple, dst): - spec = KittenGroomer(src_simple, dst, debug=True) - spec.processdir() - dump_logs(spec) - - -def test_generic_2(src_complex, dst): - spec = KittenGroomer(src_complex, dst, debug=True) - spec.processdir() - dump_logs(spec) - - -@py2_only -def test_filecheck(src_complex, dst): - spec = KittenGroomerFileCheck(src_complex, dst, debug=True) - spec.processdir() - dump_logs(spec) - - -@py2_only -def test_filecheck_2(src_simple, dst): - spec = KittenGroomerFileCheck(src_simple, dst, debug=True) - spec.processdir() - dump_logs(spec) - -## Helper functions - -def dump_logs(spec): - print(open(spec.log_processing, 'rb').read()) - if spec.debug: - if os.path.exists(spec.log_debug_err): - print(open(spec.log_debug_err, 'rb').read()) - if os.path.exists(spec.log_debug_out): - print(open(spec.log_debug_out, 'rb').read()) diff --git a/tests/test_filecheck.py b/tests/test_filecheck.py new file mode 100644 index 0000000..ac7cf42 --- /dev/null +++ b/tests/test_filecheck.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import os + +import pytest + +from tests.logging import save_logs +try: + from bin.filecheck import KittenGroomerFileCheck, File, main + NODEPS = False +except ImportError: + NODEPS = True + +skipif_nodeps = pytest.mark.skipif(NODEPS, + reason="Dependencies aren't installed") + + +@skipif_nodeps +class TestIntegration: + + @pytest.fixture + def src_valid(self): + return os.path.join(os.getcwd(), 'tests/src_valid') + + @pytest.fixture + def src_invalid(self): + return os.path.join(os.getcwd(), 'tests/src_invalid') + + @pytest.fixture + def dst(self): + return os.path.join(os.getcwd(), 'tests/dst') + + def test_filecheck(self, src_invalid, dst): + groomer = KittenGroomerFileCheck(src_invalid, dst, debug=True) + groomer.processdir() + test_description = "filecheck_invalid" + save_logs(groomer, test_description) + + def test_filecheck_2(self, src_valid, dst): + groomer = KittenGroomerFileCheck(src_valid, dst, debug=True) + groomer.processdir() + test_description = "filecheck_valid" + save_logs(groomer, test_description) + + +class TestFileHandling: + pass diff --git a/tests/test_integration.py b/tests/test_integration.py deleted file mode 100644 index 72adafb..0000000 --- a/tests/test_integration.py +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import os - -import kittengroomer as kg -import bin.specific as specific - -PATH = os.getcwd() + '/tests/' - - -def test_base(): - assert kg.FileBase - assert kg.KittenGroomerBase - assert kg.main - - -def test_help_file(): - f = kg.FileBase('tests/src_complex/blah.conf', 'tests/dst/blah.conf') - f.make_unknown() - f.make_binary() - f.make_unknown() - f.make_dangerous() - f.make_binary() - f.make_dangerous() diff --git a/tests/test_helpers.py b/tests/test_kittengroomer.py similarity index 93% rename from tests/test_helpers.py rename to tests/test_kittengroomer.py index a14510a..9698a95 100644 --- a/tests/test_helpers.py +++ b/tests/test_kittengroomer.py @@ -2,14 +2,12 @@ # -*- coding: utf-8 -*- import os -import sys import pytest from kittengroomer import FileBase, KittenGroomerBase from kittengroomer.helpers import ImplementationRequired -PY3 = sys.version_info.major == 3 skip = pytest.mark.skip xfail = pytest.mark.xfail fixture = pytest.fixture @@ -21,7 +19,7 @@ class TestFileBase: @fixture def source_file(self): - return 'tests/src_simple/blah.conf' + return 'tests/src_valid/blah.conf' @fixture def dest_file(self): @@ -84,23 +82,15 @@ class TestFileBase: # We should probably catch everytime that happens and tell the user explicitly happened (and maybe put it in the log) def test_create(self): - file = FileBase('tests/src_simple/blah.conf', '/tests/dst/blah.conf') + file = FileBase('tests/src_valid/blah.conf', '/tests/dst/blah.conf') def test_create_broken(self, tmpdir): with pytest.raises(TypeError): file_no_args = FileBase() - if PY3: - with pytest.raises(FileNotFoundError): - file_empty_args = FileBase('', '') - else: - with pytest.raises(IOError): - file_empty_args = FileBase('', '') - if PY3: - with pytest.raises(IsADirectoryError): - file_directory = FileBase(tmpdir.strpath, tmpdir.strpath) - else: - with pytest.raises(IOError): - file_directory = FileBase(tmpdir.strpath, tmpdir.strpath) + with pytest.raises(FileNotFoundError): + file_empty_args = FileBase('', '') + with pytest.raises(IsADirectoryError): + file_directory = FileBase(tmpdir.strpath, tmpdir.strpath) # are there other cases here? path to a file that doesn't exist? permissions? def test_init(self, generic_conf_file): @@ -113,6 +103,13 @@ class TestFileBase: # assert file.log_details == copied_log # this fails for now, we need to make log_details undeletable # we should probably check for more extensions here + def test_extension_uppercase(self, tmpdir): + file_path = tmpdir.join('TEST.TXT') + file_path.write('testing') + file_path = file_path.strpath + file = FileBase(file_path, file_path) + assert file.extension == '.txt' + def test_mimetypes(self, generic_conf_file): assert generic_conf_file.has_mimetype() assert generic_conf_file.mimetype == 'text/plain' @@ -221,7 +218,7 @@ class TestKittenGroomerBase: @fixture def source_directory(self): - return 'tests/src_complex' + return 'tests/src_invalid' @fixture def dest_directory(self): diff --git a/tests/test_logs/.keepdir b/tests/test_logs/.keepdir new file mode 100644 index 0000000..e69de29 diff --git a/tox.ini b/tox.ini index da1ed31..0215047 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist=py27,py35 +envlist=py35 [testenv] deps=-rdev-requirements.txt -commands= pytest tests/test_helpers.py --cov=kittengroomer +commands= pytest --cov=kittengroomer --cov=bin