diff --git a/.gitignore b/.gitignore index 3f07002..4680d14 100644 --- a/.gitignore +++ b/.gitignore @@ -71,8 +71,8 @@ tests/dst/* tests/*_dst tests/test_logs/* !tests/**/.keepdir -!tests/src_invalid/* -!tests/src_valid/* +!tests/dangerous/* +!tests/normal/* pdfid.py # Plugins are pdfid stuff plugin_* diff --git a/.travis.yml b/.travis.yml index 0cf3beb..54a7025 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,7 @@ python: - nightly sudo: required -# do we need sudo? should double check +# https://docs.travis-ci.com/user/ci-environment/#Virtualization-environments dist: trusty @@ -36,7 +36,7 @@ install: - pip install git+https://github.com/grierforensics/officedissector.git # PyCIRCLean dependencies - pip install -r dev-requirements.txt - - pip install coveralls codecov + - pip install codecov # Testing dependencies - sudo apt-get install rar # Prepare tests @@ -45,7 +45,7 @@ install: - pushd theZoo/malwares/Binaries - python unpackall.py - popd - - mv theZoo/malwares/Binaries/out tests/src_invalid/ + - mv theZoo/malwares/Binaries/out tests/uncategorized/ # Path traversal attacks - git clone https://github.com/jwilk/path-traversal-samples - pushd path-traversal-samples @@ -56,25 +56,23 @@ install: - make - popd - popd - - mv path-traversal-samples/zip/*.zip tests/src_invalid/ - - mv path-traversal-samples/rar/*.rar tests/src_invalid/ + - mv path-traversal-samples/zip/*.zip tests/uncategorized/ + - mv path-traversal-samples/rar/*.rar tests/uncategorized/ # Office docs - git clone https://github.com/eea/odfpy.git - - mv odfpy/tests/examples/* tests/src_invalid/ - - pushd tests/src_invalid/ + - mv odfpy/tests/examples/* tests/uncategorized/ + - pushd tests/uncategorized/ - wget https://bitbucket.org/decalage/olefileio_pl/raw/3073963b640935134ed0da34906fea8e506460be/Tests/images/test-ole-file.doc - wget --no-check-certificate https://www.officedissector.com/corpus/fraunhoferlibrary.zip - unzip -o fraunhoferlibrary.zip - rm fraunhoferlibrary.zip - - 7z x -p42 42.zip - # Some random samples - # - wget http://www.sample-videos.com/audio/mp3/india-national-anthem.mp3 - # - wget http://www.sample-videos.com/video/mp4/720/big_buck_bunny_720p_1mb.mp4 - - wget http://thewalter.net/stef/software/rtfx/sample.rtf - popd + # - pushd tests/dangerous/ + # - 7z x -p42 42.zip + # - popd script: - - travis_wait 30 py.test --cov=kittengroomer --cov=bin tests/ + - travis_wait py.test --cov=kittengroomer --cov=bin tests/ notifications: email: @@ -82,5 +80,4 @@ notifications: on_failure: change after_success: - - coveralls - codecov diff --git a/bin/filecheck.py b/bin/filecheck.py index 536f4e1..540f043 100644 --- a/bin/filecheck.py +++ b/bin/filecheck.py @@ -14,8 +14,6 @@ import officedissector import warnings import exifread from PIL import Image -# TODO: why do we have this import? How does filecheck handle pngs? -# from PIL import PngImagePlugin from pdfid import PDFiD, cPDFiD from kittengroomer import FileBase, KittenGroomerBase, Logging @@ -119,14 +117,20 @@ class Config: class File(FileBase): + """ + Main file object + + Created for each file that is processed by KittenGroomer. Contains all + filetype-specific processing methods. + """ def __init__(self, src_path, dst_path, logger): super(File, self).__init__(src_path, dst_path) - self.is_recursive = False + self.is_archive = False self.logger = logger self.tempdir_path = self.dst_path + '_temp' - subtypes_apps = [ + subtypes_apps = ( (Config.mimes_office, self._winoffice), (Config.mimes_ooxml, self._ooxml), (Config.mimes_rtf, self.text), @@ -136,13 +140,13 @@ class File(FileBase): (Config.mimes_ms, self._executables), (Config.mimes_compressed, self._archive), (Config.mimes_data, self._binary_app), - ] + ) self.app_subtype_methods = self._make_method_dict(subtypes_apps) - types_metadata = [ + types_metadata = ( (Config.mimes_exif, self._metadata_exif), (Config.mimes_png, self._metadata_png), - ] + ) self.metadata_mimetype_methods = self._make_method_dict(types_metadata) self.mime_processing_options = { @@ -204,19 +208,34 @@ class File(FileBase): self.make_dangerous('Extension does not match expected extensions for this mimetype') def _check_filename(self): - if self.filename[0] is '.': - # TODO: handle dotfiles here - pass + """ + Verify the filename + + If the filename contains any dangerous or specific characters, handle + them appropriately. + """ + if self.filename.startswith('.'): + macos_hidden_files = set( + '.Trashes', '._.Trashes', '.DS_Store', '.fseventsd', '.Spotlight-V100' + ) + if self.filename in macos_hidden_files: + self.add_description('MacOS hidden metadata file.') + self.should_copy = False right_to_left_override = u"\u202E" if right_to_left_override in self.filename: self.make_dangerous('Filename contains dangerous character') - self.dst_path = self.dst_path.replace(right_to_left_override, '') - # TODO: change self.filename and'filename' property? Or should those reflect the values on the source key + new_filename = self.filename.replace(right_to_left_override, '') + self.set_property('filename', new_filename) def check(self): - if self.main_type in Config.ignored_mimes: + """ + Main file processing method + + Delegates to various helper methods including filetype-specific checks. + """ + if self.maintype in Config.ignored_mimes: self.should_copy = False - self.mime_processing_options.get(self.main_type, self.unknown)() + self.mime_processing_options.get(self.maintype, self.unknown)() else: self._check_dangerous() self._check_filename() @@ -225,13 +244,14 @@ class File(FileBase): if self.has_mimetype: self._check_mimetype() if not self.is_dangerous: - self.mime_processing_options.get(self.main_type, self.unknown)() + self.mime_processing_options.get(self.maintype, self.unknown)() def write_log(self): + """Pass information about the file to self.logger""" props = self.get_all_props() - if not self.is_recursive: + if not self.is_archive: if os.path.exists(self.tempdir_path): - # Hack to make images appear at the correct tree depth in log + # FIXME: Hack to make images appear at the correct tree depth in log self.logger.add_file(self.src_path, props, in_tempdir=True) return self.logger.add_file(self.src_path, props) @@ -293,13 +313,13 @@ class File(FileBase): def text(self): """Process an rtf, ooxml, or plaintext file.""" for mt in Config.mimes_rtf: - if mt in self.sub_type: + if mt in self.subtype: self.add_description('Rich Text (rtf) file') # TODO: need a way to convert it to plain text self.force_ext('.txt') return for mt in Config.mimes_ooxml: - if mt in self.sub_type: + if mt in self.subtype: self.add_description('OOXML (openoffice) file') self._ooxml() return @@ -308,13 +328,12 @@ class File(FileBase): def application(self): """Process an application specific file according to its subtype.""" - for subtype, method in self.app_subtype_methods.items(): - if subtype in self.sub_type: - # TODO: should we change the logic so we don't iterate through all of the subtype methods? - # TODO: should these methods return a value? - method() - return - self._unknown_app() + if self.subtype in self.app_subtype_methods: + method = self.app_subtype_methods[self.subtype] + method() + # TODO: should these application methods return a value? + else: + self._unknown_app() def _executables(self): """Process an executable file.""" @@ -346,9 +365,7 @@ class File(FileBase): self.make_dangerous('WinOffice file containing a macro') for i in indicators: if i.id == 'ObjectPool' and i.value: - # TODO: is having an ObjectPool suspicious? - # LOG: user defined property - self.add_description('WinOffice file containing an object pool') + self.make_dangerous('WinOffice file containing an object pool') elif i.id == 'flash' and i.value: self.make_dangerous('WinOffice file with embedded flash') self.add_description('WinOffice file') @@ -372,7 +389,7 @@ class File(FileBase): if len(doc.features.embedded_packages) > 0: self.make_dangerous('Ooxml file with embedded packages') if not self.is_dangerous: - self.add_description('OOXML file') + self.add_description('Ooxml file') def _libreoffice(self): """Process a libreoffice file.""" @@ -423,17 +440,15 @@ class File(FileBase): # TODO: change this to something archive type specific instead of generic 'Archive' self.add_description('Archive') self.should_copy = False - self.is_recursive = True + self.is_archive = True def _unknown_app(self): """Process an unknown file.""" - self.add_description('Unknown application file') - self.make_unknown() + self.make_dangerous('Unknown application file') def _binary_app(self): """Process an unknown binary file.""" - self.add_description('Unknown binary file') - self.make_binary() + self.make_dangerous('Unknown binary file') ####################### # Metadata extractors @@ -557,7 +572,7 @@ class GroomerLogger(object): self.log_debug_out = os.devnull def _make_log_dir(self, root_dir_path): - """Make the directory in the dest dir that will hold the logs""" + """Create the directory in the dest dir that will hold the logs""" log_dir_path = os.path.join(root_dir_path, 'logs') if os.path.exists(log_dir_path): shutil.rmtree(log_dir_path) @@ -565,6 +580,7 @@ class GroomerLogger(object): return log_dir_path def _add_root_dir(self, root_path): + """Add the root directory to the log""" dirname = os.path.split(root_path)[1] + '/' with open(self.log_path, mode='ab') as lf: lf.write(bytes(dirname, 'utf-8')) @@ -572,40 +588,51 @@ class GroomerLogger(object): def add_file(self, file_path, file_props, in_tempdir=False): """Add a file to the log. Takes a dict of file properties.""" - # TODO: fix var names in this method - # TODO: handle symlinks better: symlink_string = '{}+-- {}\t- Symbolic link to {}\n'.format(padding, f, os.readlink(curpath)) - props = file_props depth = self._get_path_depth(file_path) - description_string = ', '.join(props['description_string']) + description_string = ', '.join(file_props['description_string']) file_hash = Logging.computehash(file_path)[:6] - if props['safety_category'] is None: - descr_cat = "Normal" + if file_props['is_dangerous']: + description_category = "Dangerous" else: - descr_cat = props['safety_category'].capitalize() - # TODO: make size adjust to MB/GB for large files - size = str(props['file_size']) + 'B' - file_template = "+- {name} ({sha_hash}): {size}, {mt}/{st}. {desc}: {desc_str}" + description_category = "Normal" + size_string = self._format_file_size(file_props['file_size']) + file_template = "+- {name} ({sha_hash}): {size}, type: {mt}/{st}. {desc}: {desc_str}" file_string = file_template.format( - name=props['filename'], + name=file_props['filename'], sha_hash=file_hash, - size=size, - mt=props['maintype'], - st=props['subtype'], - desc=descr_cat, + size=size_string, + mt=file_props['maintype'], + st=file_props['subtype'], + desc=description_category, desc_str=description_string, - # errs='' # TODO: add errors in human readable form here ) + # TODO: finish adding Errors and check that they appear properly + # if file_props['errors']: + # error_string = ', '.join([str(key) for key in file_props['errors']]) + # file_string.append(' Errors: ' + error_string) if in_tempdir: depth -= 1 self._write_line_to_log(file_string, depth) def add_dir(self, dir_path): + """Add a directory to the log""" path_depth = self._get_path_depth(dir_path) dirname = os.path.split(dir_path)[1] + '/' log_line = '+- ' + dirname self._write_line_to_log(log_line, path_depth) + def _format_file_size(self, size): + """Returns a string with the file size and appropriate unit""" + file_size = size + for unit in ('B', 'KB', 'MB', 'GB'): + if file_size < 1024: + return str(int(file_size)) + unit + else: + file_size = file_size / 1024 + return str(int(file_size)) + 'GB' + def _get_path_depth(self, path): + """Returns the relative path depth compared to root directory""" if self._dst_root_path in path: base_path = self._dst_root_path elif self._src_root_path in path: @@ -615,6 +642,11 @@ class GroomerLogger(object): return path_depth def _write_line_to_log(self, line, indentation_depth): + """ + Write a line to the log + + Pad the line according to the `indentation_depth`. + """ padding = b' ' padding += b'| ' * indentation_depth line_bytes = os.fsencode(line) @@ -630,18 +662,17 @@ class KittenGroomerFileCheck(KittenGroomerBase): super(KittenGroomerFileCheck, self).__init__(root_src, root_dst) self.recursive_archive_depth = 0 self.max_recursive_depth = max_recursive_depth - self.cur_file = None self.logger = GroomerLogger(root_src, root_dst, debug) def process_dir(self, src_dir, dst_dir): """Process a directory on the source key.""" for srcpath in self.list_files_dirs(src_dir): - if os.path.isdir(srcpath): + if not os.path.islink(srcpath) and os.path.isdir(srcpath): self.logger.add_dir(srcpath) else: dstpath = os.path.join(dst_dir, os.path.basename(srcpath)) - self.cur_file = File(srcpath, dstpath, self.logger) - self.process_file(self.cur_file) + cur_file = File(srcpath, dstpath, self.logger) + self.process_file(cur_file) def process_file(self, file): """ @@ -654,8 +685,8 @@ class KittenGroomerFileCheck(KittenGroomerBase): if file.should_copy: file.safe_copy() file.set_property('copied', True) - file.write_log() - if file.is_recursive: + file.write_log() + if file.is_archive: self.process_archive(file) # TODO: Can probably handle cleaning up the tempdir better if hasattr(file, 'tempdir_path'): @@ -673,8 +704,6 @@ class KittenGroomerFileCheck(KittenGroomerBase): file.make_dangerous('Archive bomb') else: tempdir_path = file.make_tempdir() - # TODO: double check we are properly escaping file.src_path - # otherwise we are running unsanitized user input directly in the shell command_str = '{} -p1 x "{}" -o"{}" -bd -aoa' unpack_command = command_str.format(SEVENZ_PATH, file.src_path, tempdir_path) @@ -695,12 +724,20 @@ class KittenGroomerFileCheck(KittenGroomerBase): return True def list_files_dirs(self, root_dir_path): + """ + Returns a list of all files and directories + + Performs a depth-first traversal of the file tree. + """ queue = [] for path in sorted(os.listdir(root_dir_path), key=lambda x: str.lower(x)): full_path = os.path.join(root_dir_path, path) - if os.path.isdir(full_path): + # check for symlinks first to prevent getting trapped in infinite symlink recursion + if os.path.islink(full_path): queue.append(full_path) - queue += self.list_files_dirs(full_path) # if path is a dir, recurse through its contents + elif os.path.isdir(full_path): + queue.append(full_path) + queue += self.list_files_dirs(full_path) elif os.path.isfile(full_path): queue.append(full_path) return queue diff --git a/dev-requirements.txt b/dev-requirements.txt index 8d2f7f4..aa0fdd9 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,3 +1,4 @@ python-magic pytest pytest-cov +PyYAML diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py index 998015c..a31abce 100644 --- a/kittengroomer/helpers.py +++ b/kittengroomer/helpers.py @@ -16,19 +16,6 @@ import argparse import magic -class KittenGroomerError(Exception): - """Base KittenGroomer exception handler.""" - - def __init__(self, message): - super(KittenGroomerError, self).__init__(message) - self.message = message - - -class ImplementationRequired(KittenGroomerError): - """Implementation required error.""" - pass - - class FileBase(object): """ Base object for individual files in the source directory. @@ -43,82 +30,44 @@ class FileBase(object): Create various properties and determine the file's mimetype. """ self.src_path = src_path - self.dst_path = dst_path - self.filename = os.path.basename(self.src_path) - self._file_props = { - 'filepath': self.src_path, - 'filename': self.filename, - 'file_size': self.size, - 'maintype': None, - 'subtype': None, - 'extension': None, - 'safety_category': None, - 'symlink': False, - 'copied': False, - 'description_string': [], # array of descriptions to be joined - 'errors': {}, - 'user_defined': {} - } - self.extension = self._determine_extension() - self.set_property('extension', self.extension) - self.mimetype = self._determine_mimetype() + self.dst_dir = os.path.dirname(dst_path) + self.filename = os.path.basename(src_path) + self.size = self._get_size(src_path) + self.is_dangerous = False + self.copied = False + self.symlink_path = None + self.description_string = [] # array of descriptions to be joined + self._errors = {} + self._user_defined = {} self.should_copy = True - self.main_type = None - self.sub_type = None - if self.mimetype: - self.main_type, self.sub_type = self._split_subtypes(self.mimetype) - if self.main_type: - self.set_property('maintype', self.main_type) - if self.sub_type: - self.set_property('subtype', self.sub_type) - - def _determine_extension(self): - _, ext = os.path.splitext(self.src_path) - ext = ext.lower() - if ext == '': - ext = None - return ext - - def _determine_mimetype(self): - if os.path.islink(self.src_path): - # magic will throw an IOError on a broken symlink - mimetype = 'inode/symlink' - self.set_property('symlink', os.readlink(self.src_path)) - else: - try: - mt = magic.from_file(self.src_path, mime=True) - # Note: libmagic will always return something, even if it's just 'data' - except UnicodeEncodeError as e: - # FIXME: The encoding of the file that triggers this is broken (possibly it's UTF-16 and Python expects utf8) - # Note: one of the Travis files will trigger this exception - self.add_error(e, '') - mt = None - try: - mimetype = mt.decode("utf-8") - except: - mimetype = mt - return mimetype - - def _split_subtypes(self, mimetype): - if '/' in mimetype: - main_type, sub_type = mimetype.split('/') - else: - main_type, sub_type = None, None - return main_type, sub_type + self.mimetype = self._determine_mimetype(src_path) @property - def size(self): - """Filesize in bytes as an int, 0 if file does not exist.""" - try: - size = os.path.getsize(self.src_path) - except FileNotFoundError: - size = 0 - return size + def dst_path(self): + return os.path.join(self.dst_dir, self.filename) + + @property + def extension(self): + _, ext = os.path.splitext(self.filename) + if ext == '': + return None + else: + return ext.lower() + + @property + def maintype(self): + main, _ = self._split_mimetype(self.mimetype) + return main + + @property + def subtype(self): + _, sub = self._split_mimetype(self.mimetype) + return sub @property def has_mimetype(self): """True if file has a main and sub mimetype, else False.""" - if not self.main_type or not self.sub_type: + if not self.maintype or not self.subtype: return False else: return True @@ -131,43 +80,41 @@ class FileBase(object): else: return True - @property - def is_dangerous(self): - """True if file has been marked 'dangerous', else False.""" - return self._file_props['safety_category'] is 'dangerous' - - @property - def is_unknown(self): - """True if file has been marked 'unknown', else False.""" - return self._file_props['safety_category'] is 'unknown' - - @property - def is_binary(self): - """True if file has been marked 'binary', else False.""" - return self._file_props['safety_category'] is 'binary' - @property def is_symlink(self): - """True if file is a symlink, else False.""" - if self._file_props['symlink'] is False: + """True if file is a symlink, else False.""" + if self.symlink_path is None: return False else: return True + @property + def description_string(self): + return self.__description_string + + @description_string.setter + def description_string(self, value): + if hasattr(self, 'description_string'): + if isinstance(value, str): + if value not in self.__description_string: + self.__description_string.append(value) + else: + raise TypeError("Description_string can only include strings") + else: + self.__description_string = value + def set_property(self, prop_string, value): """ - Take a property and a value and add them to the file's property dict. + Take a property and a value and add them to the file's stored props. If `prop_string` is part of the file property API, set it to `value`. Otherwise, add `prop_string`: `value` to `user_defined` properties. + TODO: rewrite docstring """ - if prop_string is 'description_string': - if value not in self._file_props['description_string']: - self._file_props['description_string'].append(value) - elif prop_string in self._file_props.keys(): - self._file_props[prop_string] = value + if hasattr(self, prop_string): + setattr(self, prop_string, value) else: - self._file_props['user_defined'][prop_string] = value + self._user_defined[prop_string] = value def get_property(self, prop_string): """ @@ -175,20 +122,34 @@ class FileBase(object): Returns `None` if `prop_string` cannot be found on the file. """ - if prop_string in self._file_props: - return self._file_props[prop_string] - elif prop_string in self._file_props['user_defined']: - return self._file_props['user_defined'][prop_string] - else: - return None + try: + return getattr(self, prop_string) + except AttributeError: + return self._user_defined.get(prop_string, None) def get_all_props(self): """Return a dict containing all stored properties of this file.""" - return self._file_props + # Maybe move this onto the logger? I think that makes more sense + props_dict = { + 'filepath': self.src_path, + 'filename': self.filename, + 'file_size': self.size, + 'mimetype': self.mimetype, + 'maintype': self.maintype, + 'subtype': self.subtype, + 'extension': self.extension, + 'is_dangerous': self.is_dangerous, + 'symlink_path': self.symlink_path, + 'copied': self.copied, + 'description_string': self.description_string, + 'errors': self._errors, + 'user_defined': self._user_defined + } + return props_dict def add_error(self, error, info_string): """Add an `error`: `info_string` pair to the file.""" - self._file_props['errors'].update({error: info_string}) + self._errors.update({error: info_string}) def add_description(self, description_string): """ @@ -205,29 +166,11 @@ class FileBase(object): Prepend and append DANGEROUS to the destination file name to help prevent double-click of death. """ - if self.is_dangerous: - self.set_property('description_string', reason_string) - return - self.set_property('safety_category', 'dangerous') - self.set_property('description_string', reason_string) - path, filename = os.path.split(self.dst_path) - self.dst_path = os.path.join(path, 'DANGEROUS_{}_DANGEROUS'.format(filename)) - - def make_unknown(self): - """Mark file as an unknown type and prepend UNKNOWN to filename.""" - if self.is_dangerous or self.is_binary: - return - self.set_property('safety_category', 'unknown') - path, filename = os.path.split(self.dst_path) - self.dst_path = os.path.join(path, 'UNKNOWN_{}'.format(filename)) - - def make_binary(self): - """Mark file as a binary and append .bin to filename.""" - if self.is_dangerous: - return - self.set_property('safety_category', 'binary') - path, filename = os.path.split(self.dst_path) - self.dst_path = os.path.join(path, '{}.bin'.format(filename)) + if not self.is_dangerous: + self.set_property('is_dangerous', True) + self.filename = 'DANGEROUS_{}_DANGEROUS'.format(self.filename) + if reason_string: + self.add_description(reason_string) def safe_copy(self, src=None, dst=None): """Copy file and create destination directories if needed.""" @@ -236,52 +179,89 @@ class FileBase(object): if dst is None: dst = self.dst_path try: - dst_path, filename = os.path.split(dst) - if not os.path.exists(dst_path): - os.makedirs(dst_path) + os.makedirs(self.dst_dir, exist_ok=True) shutil.copy(src, dst) - except Exception as e: + except IOError as e: + # Probably means we can't write in the dest dir self.add_error(e, '') - def force_ext(self, ext): - """If dst_path does not end in ext, append .ext to it.""" - ext = self._check_leading_dot(ext) - if not self.dst_path.endswith(ext): - # LOG: do we want to log that the extension was changed as below? - # self.set_property('force_ext', True) - self.dst_path += ext - if not self._file_props['extension'] == ext: - self.set_property('extension', ext) + def force_ext(self, extension): + """If dst_path does not end in `extension`, append .ext to it.""" + new_ext = self._check_leading_dot(extension) + if not self.filename.endswith(new_ext): + # TODO: log that the extension was changed + self.filename += new_ext + if not self.get_property('extension') == new_ext: + self.set_property('extension', new_ext) - def create_metadata_file(self, ext): + def create_metadata_file(self, extension): + # TODO: this method name is confusing """ Create a separate file to hold extracted metadata. - The string `ext` will be used as the extension for the metadata file. + The string `extension` will be used as the extension for the file. """ - ext = self._check_leading_dot(ext) + ext = self._check_leading_dot(extension) try: + # Prevent using the same path as another file from src_path if os.path.exists(self.src_path + ext): - err_str = ("Could not create metadata file for \"" + - self.filename + - "\": a file with that path already exists.") - raise KittenGroomerError(err_str) + raise KittenGroomerError( + "Could not create metadata file for \"" + + self.filename + + "\": a file with that path exists.") else: - dst_dir_path, filename = os.path.split(self.dst_path) - if not os.path.exists(dst_dir_path): - os.makedirs(dst_dir_path) + os.makedirs(self.dst_dir, exist_ok=True) + # TODO: shouldn't mutate state and also return something self.metadata_file_path = self.dst_path + ext return self.metadata_file_path + # TODO: can probably let this exception bubble up except KittenGroomerError as e: self.add_error(e, '') return False def _check_leading_dot(self, ext): + # TODO: this method name is confusing if len(ext) > 0: if not ext.startswith('.'): return '.' + ext return ext + def _determine_mimetype(self, file_path): + if os.path.islink(file_path): + # libmagic will throw an IOError on a broken symlink + mimetype = 'inode/symlink' + self.set_property('symlink_path', os.readlink(file_path)) + else: + try: + mt = magic.from_file(file_path, mime=True) + # libmagic will always return something, even if it's just 'data' + except UnicodeEncodeError as e: + # FIXME: The encoding of the file that triggers this is broken (possibly it's UTF-16 and Python expects utf8) + # Note: one of the Travis files will trigger this exception + self.add_error(e, '') + mt = None + try: + mimetype = mt.decode("utf-8") + except: + # FIXME: what should the exception be here if mimetype isn't utf-8? + mimetype = mt + return mimetype + + def _split_mimetype(self, mimetype): + if mimetype and '/' in mimetype: + main_type, sub_type = mimetype.split('/') + else: + main_type, sub_type = None, None + return main_type, sub_type + + def _get_size(self, file_path): + """Filesize in bytes as an int, 0 if file does not exist.""" + try: + size = os.path.getsize(file_path) + except FileNotFoundError: + size = 0 + return size + class Logging(object): @@ -331,13 +311,24 @@ class KittenGroomerBase(object): ####################### - # TODO: if we move main() we can get rid of this as well def processdir(self, src_dir, dst_dir): """Implement this function to define file processing behavior.""" raise ImplementationRequired('Please implement processdir.') -# TODO: Should this get moved to filecheck? It isn't really API code and somebody can implement it themselves +class KittenGroomerError(Exception): + """Base KittenGroomer exception handler.""" + + def __init__(self, message): + super(KittenGroomerError, self).__init__(message) + self.message = message + + +class ImplementationRequired(KittenGroomerError): + """Implementation required error.""" + pass + + def main(kg_implementation, description='Call a KittenGroomer implementation to process files present in the source directory and copy them to the destination directory.'): parser = argparse.ArgumentParser(prog='KittenGroomer', description=description) parser.add_argument('-s', '--source', type=str, help='Source directory') diff --git a/tests/dangerous/archivebomb.zip b/tests/dangerous/archivebomb.zip new file mode 100644 index 0000000..c3bc879 Binary files /dev/null and b/tests/dangerous/archivebomb.zip differ diff --git a/tests/src_invalid/autorun.inf b/tests/dangerous/autorun.inf similarity index 100% rename from tests/src_invalid/autorun.inf rename to tests/dangerous/autorun.inf diff --git a/tests/src_invalid/blah.conf b/tests/dangerous/config_file.conf similarity index 100% rename from tests/src_invalid/blah.conf rename to tests/dangerous/config_file.conf diff --git a/tests/src_invalid/message.msg b/tests/dangerous/message.msg similarity index 100% rename from tests/src_invalid/message.msg rename to tests/dangerous/message.msg diff --git a/tests/src_invalid/testRTL‮exe.txt b/tests/dangerous/testRTL‮exe.txt similarity index 100% rename from tests/src_invalid/testRTL‮exe.txt rename to tests/dangerous/testRTL‮exe.txt diff --git a/tests/file_catalog.yaml b/tests/file_catalog.yaml new file mode 100644 index 0000000..13a6ab9 --- /dev/null +++ b/tests/file_catalog.yaml @@ -0,0 +1,56 @@ +#YAML +# Possible fields: + # description: + # mimetype: + # xfail: + +normal: + Example.gif: # Added: 27-06-2017, source: https://en.wikipedia.org/wiki/File:Example.gif + mimetype: image/gif + Example.jpg: # Added: 27-06-2017, source: https://en.wikipedia.org/wiki/File:Example.jpg + mimetype: image/jpeg + Example.ogg: # Added: 27-06-2017, source: https://en.wikipedia.org/wiki/File:Example.ogg + description: Ogg vorbis sound file + mimetype: audio/ogg + xfail: True + Example.png: # Added: 27-06-2017, source: https://en.wikipedia.org/wiki/File:Example.png + mimetype: image/png + Example.svg: # Added: 27-06-2017, source: https://en.wikipedia.org/wiki/File:Example.svg + mimetype: image/svg+xml + xfail: True + pdf-sample.pdf: # Added: 27-06-2017, source: http://che.org.il/wp-content/uploads/2016/12/pdf-sample.pdf + mimetype: application/pdf + plaintext.txt: # Added: 27-06-2017, source: hand-generated + mimetype: text/plain + rar_archive.rar: # Added: 27-06-2017, Rar archive. Source: hand-generated + description: rar archive + mimetype: application/x-rar + xfail: True + rich_text.rtf: # Added 27-06-2017), source: hand-generated + mimetype: text/rtf + sample_mpeg4.mp4: # Added 28-06-2017, source: https://support.apple.com/en-us/HT201549 + mimetype: video/mp4 + zip_archive.zip: # Added 27-06-2017, source: hand-generated + mimetype: application/zip + +dangerous: + # 42.zip: # Added 27-06-2017, source: http://www.unforgettable.dk/42.zip + # description: zip archivebomb, password is '42' + # mimetype: application/zip + # xfail: True + archivebomb.zip: # Added 16-07-2017, source: hand-generated + description: zip archivebomb with 3 levels + mimetype: application/zip + xfail: True + autorun.inf: # Added 27-06-2017, source: hand-generated + description: Microsoft autorun file + mimetype: text/plain + config_file.conf: # Added 27-06-2017, source: hand-generated + description: config file + mimetype: text/plain + message.msg: # Added 27-06-2017, source: ???? + description: message file, used by Outlook etc + mimetype: message/rfc822 + testRTL‮exe.txt: # Added 27-06-2017, source: hand-generated + description: text file with right-to-left character in filename + mimetype: text/plain diff --git a/tests/normal/Example.gif b/tests/normal/Example.gif new file mode 100644 index 0000000..2b65920 Binary files /dev/null and b/tests/normal/Example.gif differ diff --git a/tests/src_valid/Example.jpg b/tests/normal/Example.jpg similarity index 100% rename from tests/src_valid/Example.jpg rename to tests/normal/Example.jpg diff --git a/tests/normal/Example.ogg b/tests/normal/Example.ogg new file mode 100644 index 0000000..0d7f43e Binary files /dev/null and b/tests/normal/Example.ogg differ diff --git a/tests/normal/Example.png b/tests/normal/Example.png new file mode 100644 index 0000000..aa61359 Binary files /dev/null and b/tests/normal/Example.png differ diff --git a/tests/normal/Example.svg b/tests/normal/Example.svg new file mode 100644 index 0000000..438734c --- /dev/null +++ b/tests/normal/Example.svg @@ -0,0 +1,69 @@ + + + + + + image/svg+xml + + + + + + + + + + + + diff --git a/tests/normal/pdf-sample.pdf b/tests/normal/pdf-sample.pdf new file mode 100644 index 0000000..75fcd69 Binary files /dev/null and b/tests/normal/pdf-sample.pdf differ diff --git a/tests/src_invalid/blah.txt b/tests/normal/plaintext.txt similarity index 100% rename from tests/src_invalid/blah.txt rename to tests/normal/plaintext.txt diff --git a/tests/normal/rar_archive.rar b/tests/normal/rar_archive.rar new file mode 100644 index 0000000..87e0b8f Binary files /dev/null and b/tests/normal/rar_archive.rar differ diff --git a/tests/normal/rich_text.rtf b/tests/normal/rich_text.rtf new file mode 100644 index 0000000..ede29b3 --- /dev/null +++ b/tests/normal/rich_text.rtf @@ -0,0 +1,68 @@ +{\rtf1\ansi\ansicpg1252\uc1\deff0\stshfdbch0\stshfloch0\stshfhich0\stshfbi0\deflang1033\deflangfe1033{\fonttbl{\f0\froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\f1\fswiss\fcharset0\fprq2{\*\panose 020b0604020202020204}Arial;} +{\f2\fmodern\fcharset0\fprq1{\*\panose 02070309020205020404}Courier New;}{\f3\froman\fcharset2\fprq2{\*\panose 05050102010706020507}Symbol;}{\f10\fnil\fcharset2\fprq2{\*\panose 05000000000000000000}Wingdings;} +{\f121\froman\fcharset238\fprq2 Times New Roman CE;}{\f122\froman\fcharset204\fprq2 Times New Roman Cyr;}{\f124\froman\fcharset161\fprq2 Times New Roman Greek;}{\f125\froman\fcharset162\fprq2 Times New Roman Tur;} +{\f126\froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\f127\froman\fcharset178\fprq2 Times New Roman (Arabic);}{\f128\froman\fcharset186\fprq2 Times New Roman Baltic;}{\f129\froman\fcharset163\fprq2 Times New Roman (Vietnamese);} +{\f131\fswiss\fcharset238\fprq2 Arial CE;}{\f132\fswiss\fcharset204\fprq2 Arial Cyr;}{\f134\fswiss\fcharset161\fprq2 Arial Greek;}{\f135\fswiss\fcharset162\fprq2 Arial Tur;}{\f136\fswiss\fcharset177\fprq2 Arial (Hebrew);} +{\f137\fswiss\fcharset178\fprq2 Arial (Arabic);}{\f138\fswiss\fcharset186\fprq2 Arial Baltic;}{\f139\fswiss\fcharset163\fprq2 Arial (Vietnamese);}{\f141\fmodern\fcharset238\fprq1 Courier New CE;}{\f142\fmodern\fcharset204\fprq1 Courier New Cyr;} +{\f144\fmodern\fcharset161\fprq1 Courier New Greek;}{\f145\fmodern\fcharset162\fprq1 Courier New Tur;}{\f146\fmodern\fcharset177\fprq1 Courier New (Hebrew);}{\f147\fmodern\fcharset178\fprq1 Courier New (Arabic);} +{\f148\fmodern\fcharset186\fprq1 Courier New Baltic;}{\f149\fmodern\fcharset163\fprq1 Courier New (Vietnamese);}}{\colortbl;\red0\green0\blue0;\red0\green0\blue255;\red0\green255\blue255;\red0\green255\blue0;\red255\green0\blue255;\red255\green0\blue0; +\red255\green255\blue0;\red255\green255\blue255;\red0\green0\blue128;\red0\green128\blue128;\red0\green128\blue0;\red128\green0\blue128;\red128\green0\blue0;\red128\green128\blue0;\red128\green128\blue128;\red192\green192\blue192;}{\stylesheet{ +\ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \fs24\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 \snext0 Normal;}{\s1\ql \li0\ri0\sb240\sa60\keepn\widctlpar\aspalpha\aspnum\faauto\outlinelevel0\adjustright\rin0\lin0\itap0 +\b\f1\fs32\lang1033\langfe1033\kerning32\cgrid\langnp1033\langfenp1033 \sbasedon0 \snext0 \styrsid2294299 heading 1;}{\*\cs10 \additive \ssemihidden Default Paragraph Font;}{\* +\ts11\tsrowd\trftsWidthB3\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\tscellwidthfts0\tsvertalt\tsbrdrt\tsbrdrl\tsbrdrb\tsbrdrr\tsbrdrdgl\tsbrdrdgr\tsbrdrh\tsbrdrv +\ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \fs20\lang1024\langfe1024\cgrid\langnp1024\langfenp1024 \snext11 \ssemihidden Normal Table;}{\*\ts15\tsrowd\trbrdrt\brdrs\brdrw10 \trbrdrl\brdrs\brdrw10 \trbrdrb\brdrs\brdrw10 +\trbrdrr\brdrs\brdrw10 \trbrdrh\brdrs\brdrw10 \trbrdrv\brdrs\brdrw10 \trftsWidthB3\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\tscellwidthfts0\tsvertalt\tsbrdrt\tsbrdrl\tsbrdrb\tsbrdrr\tsbrdrdgl\tsbrdrdgr\tsbrdrh\tsbrdrv +\ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \fs20\lang1024\langfe1024\cgrid\langnp1024\langfenp1024 \sbasedon11 \snext15 \styrsid2294299 Table Grid;}{ +\s16\ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \fs20\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 \sbasedon0 \snext16 \ssemihidden \styrsid1792631 footnote text;}{\*\cs17 \additive \super +\sbasedon10 \ssemihidden \styrsid1792631 footnote reference;}}{\*\latentstyles\lsdstimax156\lsdlockeddef0}{\*\listtable{\list\listtemplateid-767292450\listhybrid{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360 +\levelindent0{\leveltext\leveltemplateid67698689\'01\u-3913 ?;}{\levelnumbers;}\f3\fbias0 \fi-360\li720\jclisttab\tx720\lin720 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\leveltext +\leveltemplateid67698691\'01o;}{\levelnumbers;}\f2\fbias0 \fi-360\li1440\jclisttab\tx1440\lin1440 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\leveltext\leveltemplateid67698693 +\'01\u-3929 ?;}{\levelnumbers;}\f10\fbias0 \fi-360\li2160\jclisttab\tx2160\lin2160 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\leveltext\leveltemplateid67698689\'01\u-3913 ?;}{\levelnumbers +;}\f3\fbias0 \fi-360\li2880\jclisttab\tx2880\lin2880 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\leveltext\leveltemplateid67698691\'01o;}{\levelnumbers;}\f2\fbias0 \fi-360\li3600 +\jclisttab\tx3600\lin3600 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\leveltext\leveltemplateid67698693\'01\u-3929 ?;}{\levelnumbers;}\f10\fbias0 \fi-360\li4320\jclisttab\tx4320\lin4320 } +{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\leveltext\leveltemplateid67698689\'01\u-3913 ?;}{\levelnumbers;}\f3\fbias0 \fi-360\li5040\jclisttab\tx5040\lin5040 }{\listlevel\levelnfc23 +\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\leveltext\leveltemplateid67698691\'01o;}{\levelnumbers;}\f2\fbias0 \fi-360\li5760\jclisttab\tx5760\lin5760 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0 +\levelfollow0\levelstartat1\levelspace360\levelindent0{\leveltext\leveltemplateid67698693\'01\u-3929 ?;}{\levelnumbers;}\f10\fbias0 \fi-360\li6480\jclisttab\tx6480\lin6480 }{\listname ;}\listid687222349}}{\*\listoverridetable{\listoverride\listid687222349 +\listoverridecount0\ls1}}{\*\rsidtbl \rsid1792631\rsid2294299}{\*\generator Microsoft Word 11.0.6113;}{\info{\title This is a test RTF}{\author Nate}{\operator Nate}{\version2}}\widowctrl\ftnbj\aenddoc\noxlattoyen\expshrtn\noultrlspc\dntblnsbdb\nospaceforul\hyphcaps0\formshade\horzdoc\dgmargin\dghspace180\dgvspace180\dghorigin1800\dgvorigin1440 +\dghshow1\dgvshow1\jexpand\viewkind1\viewscale80\pgbrdrhead\pgbrdrfoot\splytwnine\ftnlytwnine\htmautsp\nolnhtadjtbl\useltbaln\alntblind\lytcalctblwd\lyttblrtgr\lnbrkrule\nobrkwrptbl\snaptogridincell\allowfieldendsel\wrppunct +\asianbrkrule\rsidroot2294299\newtblstyruls\nogrowautofit \fet0{\*\ftnsep \pard\plain \ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \fs24\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\insrsid1792631 \chftnsep +\par }}{\*\ftnsepc \pard\plain \ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \fs24\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\insrsid1792631 \chftnsepc +\par }}{\*\aftnsep \pard\plain \ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \fs24\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\insrsid1792631 \chftnsep +\par }}{\*\aftnsepc \pard\plain \ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \fs24\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\insrsid1792631 \chftnsepc +\par }}\sectd \linex0\endnhere\sectlinegrid360\sectdefaultcl\sftnbj {\*\pnseclvl1\pnucrm\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl2\pnucltr\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl3\pndec\pnstart1\pnindent720\pnhang {\pntxta .}} +{\*\pnseclvl4\pnlcltr\pnstart1\pnindent720\pnhang {\pntxta )}}{\*\pnseclvl5\pndec\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl6\pnlcltr\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl7\pnlcrm\pnstart1\pnindent720\pnhang +{\pntxtb (}{\pntxta )}}{\*\pnseclvl8\pnlcltr\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl9\pnlcrm\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}\pard\plain +\s1\ql \li0\ri0\sb240\sa60\keepn\widctlpar\aspalpha\aspnum\faauto\outlinelevel0\adjustright\rin0\lin0\itap0\pararsid2294299 \b\f1\fs32\lang1033\langfe1033\kerning32\cgrid\langnp1033\langfenp1033 {\insrsid2294299 This is a test RTF +\par }\pard\plain \ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid2294299 \fs24\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\insrsid2294299 Hi! I\rquote m a test file. This is some }{\b\insrsid2294299 bold}{ +\insrsid2294299 text, and some }{\i\insrsid2294299 italic}{\insrsid2294299 text, as well as some }{\ul\insrsid2294299 underline}{\insrsid2294299 text. And a bit of }{\v\insrsid2294299\charrsid2294299 hidden}{\insrsid2294299 text. So we\rquote +re going to end this paragraph here and go on to a nice little list: +\par +\par {\listtext\pard\plain\f3\insrsid2294299 \loch\af3\dbch\af0\hich\f3 \'b7\tab}}\pard \ql \fi-360\li720\ri0\widctlpar\jclisttab\tx720\aspalpha\aspnum\faauto\ls1\adjustright\rin0\lin720\itap0\pararsid2294299 {\insrsid2294299 Item 1 +\par {\listtext\pard\plain\f3\insrsid2294299 \loch\af3\dbch\af0\hich\f3 \'b7\tab}Item 2 +\par {\listtext\pard\plain\f3\insrsid2294299 \loch\af3\dbch\af0\hich\f3 \'b7\tab}Item 3 +\par {\listtext\pard\plain\f3\insrsid2294299 \loch\af3\dbch\af0\hich\f3 \'b7\tab}Item 4 +\par }\pard \ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid2294299 {\insrsid2294299 +\par And now comes a fun table: +\par +\par }\trowd \irow0\irowband0\ts15\trgaph108\trleft-108\trbrdrt\brdrs\brdrw10 \trbrdrl\brdrs\brdrw10 \trbrdrb\brdrs\brdrw10 \trbrdrr\brdrs\brdrw10 \trbrdrh\brdrs\brdrw10 \trbrdrv\brdrs\brdrw10 +\trftsWidth1\trftsWidthB3\trautofit1\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\tbllkhdrrows\tbllklastrow\tbllkhdrcols\tbllklastcol \clvertalt\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 +\cltxlrtb\clftsWidth3\clwWidth2952\clshdrawnil \cellx2844\clvertalt\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb\clftsWidth3\clwWidth2952\clshdrawnil \cellx5796\clvertalt\clbrdrt\brdrs\brdrw10 +\clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb\clftsWidth3\clwWidth2952\clshdrawnil \cellx8748\pard\plain \ql \li0\ri0\widctlpar\intbl\aspalpha\aspnum\faauto\adjustright\rin0\lin0\pararsid2294299\yts15 +\fs24\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\insrsid2294299 Cell 1\cell Cell 2 +\par More in cell 2\cell Cell 3\cell }\pard\plain \ql \li0\ri0\widctlpar\intbl\aspalpha\aspnum\faauto\adjustright\rin0\lin0 \fs24\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\insrsid2294299 \trowd \irow0\irowband0\ts15\trgaph108\trleft-108\trbrdrt +\brdrs\brdrw10 \trbrdrl\brdrs\brdrw10 \trbrdrb\brdrs\brdrw10 \trbrdrr\brdrs\brdrw10 \trbrdrh\brdrs\brdrw10 \trbrdrv\brdrs\brdrw10 +\trftsWidth1\trftsWidthB3\trautofit1\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\tbllkhdrrows\tbllklastrow\tbllkhdrcols\tbllklastcol \clvertalt\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 +\cltxlrtb\clftsWidth3\clwWidth2952\clshdrawnil \cellx2844\clvertalt\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb\clftsWidth3\clwWidth2952\clshdrawnil \cellx5796\clvertalt\clbrdrt\brdrs\brdrw10 +\clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb\clftsWidth3\clwWidth2952\clshdrawnil \cellx8748\row }\pard\plain \ql \li0\ri0\widctlpar\intbl\aspalpha\aspnum\faauto\adjustright\rin0\lin0\pararsid2294299\yts15 +\fs24\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\insrsid2294299 Next row\cell Next row \cell Next row\cell }\pard\plain \ql \li0\ri0\widctlpar\intbl\aspalpha\aspnum\faauto\adjustright\rin0\lin0 +\fs24\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\insrsid2294299 \trowd \irow1\irowband1\lastrow \ts15\trgaph108\trleft-108\trbrdrt\brdrs\brdrw10 \trbrdrl\brdrs\brdrw10 \trbrdrb\brdrs\brdrw10 \trbrdrr\brdrs\brdrw10 \trbrdrh\brdrs\brdrw10 \trbrdrv +\brdrs\brdrw10 \trftsWidth1\trftsWidthB3\trautofit1\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\tbllkhdrrows\tbllklastrow\tbllkhdrcols\tbllklastcol \clvertalt\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr +\brdrs\brdrw10 \cltxlrtb\clftsWidth3\clwWidth2952\clshdrawnil \cellx2844\clvertalt\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb\clftsWidth3\clwWidth2952\clshdrawnil \cellx5796\clvertalt\clbrdrt +\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb\clftsWidth3\clwWidth2952\clshdrawnil \cellx8748\row }\pard \ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid2294299 { +\insrsid2294299 +\par A page break: +\par \page And here we\rquote re on the next page.}{\insrsid1792631 }{\insrsid2294299 +\par }{\insrsid1792631 This para has a }{\cs17\super\insrsid1792631 \chftn {\footnote \pard\plain \s16\ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \fs20\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\cs17\super\insrsid1792631 +\chftn }{\insrsid1792631 This is the actual content of the footnote.}}}{\insrsid1792631 footnote. +\par And here\rquote s yet another paragraph. }{\insrsid1792631\charrsid2294299 +\par }} diff --git a/tests/normal/sample_mpeg4.mp4 b/tests/normal/sample_mpeg4.mp4 new file mode 100644 index 0000000..a05d935 Binary files /dev/null and b/tests/normal/sample_mpeg4.mp4 differ diff --git a/tests/normal/zip_archive.zip b/tests/normal/zip_archive.zip new file mode 100644 index 0000000..a396625 Binary files /dev/null and b/tests/normal/zip_archive.zip differ diff --git a/tests/src_invalid/42.zip b/tests/src_invalid/42.zip deleted file mode 100644 index e768153..0000000 Binary files a/tests/src_invalid/42.zip and /dev/null differ diff --git a/tests/src_invalid/blah.tar.bz2 b/tests/src_invalid/blah.tar.bz2 deleted file mode 100644 index 4aeb2cd..0000000 Binary files a/tests/src_invalid/blah.tar.bz2 and /dev/null differ diff --git a/tests/src_invalid/blah.zip b/tests/src_invalid/blah.zip deleted file mode 100644 index 3e809f4..0000000 Binary files a/tests/src_invalid/blah.zip and /dev/null differ diff --git a/tests/src_invalid/foobar.dat b/tests/src_invalid/foobar.dat deleted file mode 120000 index f553ab5..0000000 --- a/tests/src_invalid/foobar.dat +++ /dev/null @@ -1 +0,0 @@ -geneve_1564.pdf \ No newline at end of file diff --git a/tests/src_invalid/geneve_1564.pdf b/tests/src_invalid/geneve_1564.pdf deleted file mode 100644 index 9ae1d9c..0000000 Binary files a/tests/src_invalid/geneve_1564.pdf and /dev/null differ diff --git a/tests/src_invalid/geneve_1564_wrong_mime.conf b/tests/src_invalid/geneve_1564_wrong_mime.conf deleted file mode 100644 index 9ae1d9c..0000000 Binary files a/tests/src_invalid/geneve_1564_wrong_mime.conf and /dev/null differ diff --git a/tests/src_invalid/ntree.wrl b/tests/src_invalid/ntree.wrl deleted file mode 100644 index d8560b8..0000000 --- a/tests/src_invalid/ntree.wrl +++ /dev/null @@ -1,181 +0,0 @@ -#VRML V1.0 ascii - -DEF Ez3d_Scene Separator { - DEF Ez3d_Viewer Switch { - whichChild -3 - DEF Title Info { - string "" - } - DEF Viewer Info { - string "walk" - } - DEF BackgroundColor Info { - string "0.000000 0.000000 0.000000" - } - DEF Cameras Switch { - whichChild 0 - PerspectiveCamera { - position 1.12948 2.23403 9.88775 - orientation 0.192413 -0.894646 -0.403219 6.1811 - focalDistance 9.51932 - heightAngle 0.683577 - } - } - } - DEF Ez3d_Environment Switch { - whichChild -3 - } - DEF Ez3d_Objects Switch { - whichChild -3 - DEF Cube001 Separator { - Transform { - scaleFactor 1.76681 0.168973 1.76681 - } - Texture2 { - filename "brick.gif" - wrapS REPEAT - wrapT REPEAT - } - DEF Ez3d_Material Material { - ambientColor 0.0375 0.0375 0.0375 - diffuseColor 0.425 0.425 0.425 - specularColor 0.807547 0.807547 0.807547 - shininess 0.5 - transparency 0 - } - ShapeHints { - vertexOrdering COUNTERCLOCKWISE - shapeType UNKNOWN_SHAPE_TYPE - creaseAngle 0.523599 - } - DEF Ez3d_Cube001 Cube { - } - } - DEF Cylinder001 Separator { - Transform { - translation 0.0806677 1.11004 4.76837e-007 - scaleFactor 0.176417 0.977561 0.156127 - center -5.52615e-009 4.58545e-008 0 - } - Texture2 { - filename "oak.gif" - wrapS REPEAT - wrapT REPEAT - } - ShapeHints { - vertexOrdering COUNTERCLOCKWISE - shapeType UNKNOWN_SHAPE_TYPE - creaseAngle 0.523599 - } - DEF Ez3d_Cylinder001 Cylinder { - } - } - DEF Group002 Separator { - Transform { - translation 0.0497642 2.50354 -0.281726 - center 0.174201 0.111916 0.106615 - } - DEF Sphere001_2 Separator { - Transform { - translation -0.0497642 -0.0265024 -0.479769 - } - DEF Ez3d_Material Material { - ambientColor 0.0908158 0.776699 0.00823493 - diffuseColor 0.0935403 0.8 0.00848198 - specularColor 0.114655 0.980583 0.0103966 - shininess 0.184466 - transparency 0.485437 - } - ShapeHints { - vertexOrdering COUNTERCLOCKWISE - shapeType UNKNOWN_SHAPE_TYPE - creaseAngle 0.523599 - } - DEF Ez3d_Sphere001_2 Sphere { - } - } - DEF Sphere001 Separator { - Transform { - translation -0.0497642 -0.178993 0.25727 - } - DEF Ez3d_Material Material { - ambientColor 0.0908158 0.776699 0.00823493 - diffuseColor 0.0935403 0.8 0.00848198 - specularColor 0.114655 0.980583 0.0103966 - shininess 0.184466 - transparency 0.485437 - } - ShapeHints { - vertexOrdering COUNTERCLOCKWISE - shapeType UNKNOWN_SHAPE_TYPE - creaseAngle 0.523599 - } - DEF Ez3d_Sphere001 Sphere { - } - } - DEF Group001 Separator { - Transform { - translation 0.0995283 0.205495 0.222499 - center 0.0746732 -0.077734 -0.115884 - } - DEF Sphere001_4 Separator { - Transform { - translation 0.557986 0.19733 0.231768 - } - DEF Ez3d_Material Material { - ambientColor 0.0908158 0.776699 0.00823493 - diffuseColor 0.0935403 0.8 0.00848198 - specularColor 0.114655 0.980583 0.0103966 - shininess 0.184466 - transparency 0.485437 - } - ShapeHints { - vertexOrdering COUNTERCLOCKWISE - shapeType UNKNOWN_SHAPE_TYPE - creaseAngle 0.523599 - } - DEF Ez3d_Sphere001_4 Sphere { - } - } - DEF Sphere001_1 Separator { - Transform { - translation -0.149346 -0.352797 0.470501 - } - DEF Ez3d_Material Material { - ambientColor 0.0908158 0.776699 0.00823493 - diffuseColor 0.0935403 0.8 0.00848198 - specularColor 0.114655 0.980583 0.0103966 - shininess 0.184466 - transparency 0.485437 - } - ShapeHints { - vertexOrdering COUNTERCLOCKWISE - shapeType UNKNOWN_SHAPE_TYPE - creaseAngle 0.523599 - } - DEF Ez3d_Sphere001_1 Sphere { - } - } - DEF Sphere001_3 Separator { - Transform { - translation -0.40864 0.155468 -0.702269 - } - DEF Ez3d_Material Material { - ambientColor 0.0908158 0.776699 0.00823493 - diffuseColor 0.0935403 0.8 0.00848198 - specularColor 0.114655 0.980583 0.0103966 - shininess 0.184466 - transparency 0.485437 - } - ShapeHints { - vertexOrdering COUNTERCLOCKWISE - shapeType UNKNOWN_SHAPE_TYPE - creaseAngle 0.523599 - } - DEF Ez3d_Sphere001_3 Sphere { - } - } - } - } - } -} diff --git a/tests/src_invalid/test.obj b/tests/src_invalid/test.obj deleted file mode 100644 index 907b308..0000000 --- a/tests/src_invalid/test.obj +++ /dev/null @@ -1 +0,0 @@ -blah diff --git a/tests/src_valid/blah.conf b/tests/src_valid/blah.conf deleted file mode 100644 index 484ba93..0000000 --- a/tests/src_valid/blah.conf +++ /dev/null @@ -1 +0,0 @@ -This is a test. diff --git a/tests/src_valid/dir1/dir2/blah.conf b/tests/src_valid/dir1/dir2/blah.conf deleted file mode 100644 index 484ba93..0000000 --- a/tests/src_valid/dir1/dir2/blah.conf +++ /dev/null @@ -1 +0,0 @@ -This is a test. diff --git a/tests/src_valid/test.zip b/tests/src_valid/test.zip deleted file mode 100644 index 4b911c0..0000000 Binary files a/tests/src_valid/test.zip and /dev/null differ diff --git a/tests/test_filecheck.py b/tests/test_filecheck.py index e7aeaac..1282a0e 100644 --- a/tests/test_filecheck.py +++ b/tests/test_filecheck.py @@ -2,57 +2,130 @@ # -*- coding: utf-8 -*- import os -import shutil +import unittest.mock as mock import pytest +import yaml -from tests.logging import save_logs try: - from bin.filecheck import KittenGroomerFileCheck, File, main + from bin.filecheck import KittenGroomerFileCheck, File, GroomerLogger NODEPS = False except ImportError: NODEPS = True +pytestmark = pytest.mark.skipif(NODEPS, reason="Dependencies aren't installed") + fixture = pytest.fixture skip = pytest.mark.skip -skipif_nodeps = pytest.mark.skipif(NODEPS, - reason="Dependencies aren't installed") +parametrize = pytest.mark.parametrize -@skipif_nodeps -class TestSystem: - - @fixture - def valid_groomer(self): - src_path = os.path.join(os.getcwd(), 'tests/src_valid') - dst_path = self.make_dst_dir_path(src_path) - return KittenGroomerFileCheck(src_path, dst_path, debug=True) - - @fixture - def invalid_groomer(self): - src_path = os.path.join(os.getcwd(), 'tests/src_invalid') - dst_path = self.make_dst_dir_path(src_path) - return KittenGroomerFileCheck(src_path, dst_path, debug=True) - - def make_dst_dir_path(self, src_dir_path): - dst_path = src_dir_path + '_dst' - shutil.rmtree(dst_path, ignore_errors=True) - os.makedirs(dst_path, exist_ok=True) - return dst_path - - def test_filecheck_src_valid(self, valid_groomer): - valid_groomer.run() - test_description = "filecheck_valid" - save_logs(valid_groomer, test_description) - - def test_filecheck_src_invalid(self, invalid_groomer): - invalid_groomer.run() - test_description = "filecheck_invalid" - save_logs(invalid_groomer, test_description) +NORMAL_FILES_PATH = 'tests/normal/' +DANGEROUS_FILES_PATH = 'tests/dangerous/' +UNCATEGORIZED_FILES_PATH = 'tests/uncategorized' +CATALOG_PATH = 'tests/file_catalog.yaml' -class TestFileHandling: - def test_autorun(self): - # Run on a single autorun file, confirm that it gets flagged as dangerous - # TODO: build out these and other methods for individual file cases - pass +class SampleFile(): + def __init__(self, path, exp_dangerous): + self.path = path + self.filename = os.path.basename(path) + self.exp_dangerous = exp_dangerous + + +def gather_sample_files(): + file_catalog = read_file_catalog() + normal_catalog = file_catalog['normal'] + dangerous_catalog = file_catalog['dangerous'] + sample_files = create_sample_files( + normal_catalog, + NORMAL_FILES_PATH, + exp_dangerous=False + ) + sample_files.extend(create_sample_files( + dangerous_catalog, + DANGEROUS_FILES_PATH, + exp_dangerous=True + )) + return sample_files + + +def read_file_catalog(): + with open(os.path.abspath(CATALOG_PATH)) as catalog_file: + catalog_dict = yaml.safe_load(catalog_file) + return catalog_dict + + +def create_sample_files(file_catalog, dir_path, exp_dangerous): + sample_files = [] + dir_files = set_of_files(dir_path) + # Sorted to make the test cases occur in a consistent order, doesn't have to be + for filename, file_dict in sorted(file_catalog.items()): + full_path = os.path.abspath(os.path.join(dir_path, filename)) + try: + dir_files.remove(full_path) + newfile = SampleFile(full_path, exp_dangerous) + newfile.xfail = file_dict.get('xfail', False) + sample_files.append(newfile) + except KeyError: + raise FileNotFoundError("{} could not be found".format(filename)) + for file_path in dir_files: + newfile = SampleFile(file_path, exp_dangerous) + newfile.xfail = False + sample_files.append(newfile) + return sample_files + + +def set_of_files(dir_path): + """Set of all full file paths in `dir_path`.""" + full_dir_path = os.path.abspath(dir_path) + file_paths = set() + for path in os.listdir(full_dir_path): + full_path = os.path.join(full_dir_path, path) + if os.path.isfile(full_path): + file_paths.add(full_path) + return file_paths + + +def get_filename(sample_file): + return os.path.basename(sample_file.path) + + +@fixture(scope='module') +def src_dir_path(tmpdir_factory): + return tmpdir_factory.mktemp('src').strpath + +@fixture(scope='module') +def dest_dir_path(tmpdir_factory): + return tmpdir_factory.mktemp('dest').strpath + + +@fixture +def groomer(dest_dir_path): + dummy_src_path = os.getcwd() + return KittenGroomerFileCheck(dummy_src_path, dest_dir_path, debug=True) + + +@fixture +def mock_logger(dest_dir_path): + return mock.MagicMock(spec=GroomerLogger) + + +@parametrize( + argnames="sample_file", + argvalues=gather_sample_files(), + ids=get_filename) +def test_sample_files(mock_logger, sample_file, groomer, dest_dir_path): + if sample_file.xfail: + pytest.xfail(reason='Marked xfail in file catalog') + file_dest_path = os.path.join(dest_dir_path, sample_file.filename) + file = File(sample_file.path, file_dest_path, mock_logger) + groomer.process_file(file) + assert file.is_dangerous == sample_file.exp_dangerous + + +def test_uncategorized(tmpdir): + src_path = os.path.abspath(UNCATEGORIZED_FILES_PATH) + dst_path = tmpdir.strpath + groomer = KittenGroomerFileCheck(src_path, dst_path, debug=True) + groomer.run() diff --git a/tests/logging.py b/tests/test_filecheck_logging.py similarity index 94% rename from tests/logging.py rename to tests/test_filecheck_logging.py index e705231..ddb9066 100644 --- a/tests/logging.py +++ b/tests/test_filecheck_logging.py @@ -1,5 +1,10 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + import os -from datetime import datetime +import datetime + +import pytest def save_logs(groomer, test_description): diff --git a/tests/test_kittengroomer.py b/tests/test_kittengroomer.py index 538314d..1856022 100644 --- a/tests/test_kittengroomer.py +++ b/tests/test_kittengroomer.py @@ -2,249 +2,320 @@ # -*- coding: utf-8 -*- import os +import unittest.mock as mock import pytest from kittengroomer import FileBase, KittenGroomerBase +from kittengroomer.helpers import ImplementationRequired skip = pytest.mark.skip xfail = pytest.mark.xfail fixture = pytest.fixture -# FileBase - class TestFileBase: - @fixture - def source_file(self): - return 'tests/src_valid/blah.conf' + @fixture(scope='class') + def src_dir_path(self, tmpdir_factory): + return tmpdir_factory.mktemp('src').strpath + + @fixture(scope='class') + def dest_dir_path(self, tmpdir_factory): + return tmpdir_factory.mktemp('dest').strpath @fixture - def dest_file(self): - return 'tests/dst/blah.conf' - - @fixture - def generic_conf_file(self, source_file, dest_file): - return FileBase(source_file, dest_file) - - @fixture - def symlink_file(self, tmpdir): + def tmpfile_path(self, tmpdir): file_path = tmpdir.join('test.txt') file_path.write('testing') - file_path = file_path.strpath - symlink_path = tmpdir.join('symlinked.txt') + return file_path.strpath + + @fixture + def symlink_file_path(self, tmpdir, tmpfile_path): + symlink_path = tmpdir.join('symlinked') symlink_path = symlink_path.strpath - os.symlink(file_path, symlink_path) - return FileBase(symlink_path, symlink_path) + os.symlink(tmpfile_path, symlink_path) + return symlink_path @fixture - def temp_file(self, tmpdir): - file_path = tmpdir.join('test.txt') - file_path.write('testing') - file_path = file_path.strpath - return FileBase(file_path, file_path) + def text_file(self): + with mock.patch( + 'kittengroomer.helpers.magic.from_file', + return_value='text/plain' + ): + src_path = 'src/test.txt' + dst_path = 'dst/test.txt' + file = FileBase(src_path, dst_path) + return file - @fixture - def temp_file_no_ext(self, tmpdir): - file_path = tmpdir.join('test') - file_path.write('testing') - file_path = file_path.strpath - return FileBase(file_path, file_path) + # Constructor behavior - @fixture - def file_marked_dangerous(self, generic_conf_file): - generic_conf_file.make_dangerous() - return generic_conf_file + @mock.patch('kittengroomer.helpers.magic') + def test_init_identify_filename(self, mock_libmagic): + """Init should identify the filename correctly for src_path.""" + src_path = 'src/test.txt' + dst_path = 'dst/test.txt' + file = FileBase(src_path, dst_path) + assert file.filename == 'test.txt' - @fixture - def file_marked_unknown(self, generic_conf_file): - generic_conf_file.make_unknown() - return generic_conf_file - - @fixture - def file_marked_binary(self, generic_conf_file): - generic_conf_file.make_binary() - return generic_conf_file - - @fixture(params=[ - FileBase.make_dangerous, - FileBase.make_unknown, - FileBase.make_binary - ]) - def file_marked_all_parameterized(self, request, generic_conf_file): - request.param(generic_conf_file) - return generic_conf_file - - # What are the various things that can go wrong with file paths? We should have fixtures for them - # What should FileBase do if it's given a path that isn't a file (doesn't exist or is a dir)? Currently magic throws an exception - # We should probably catch everytime that happens and tell the user explicitly happened (and maybe put it in the log) - - def test_create_broken(self, tmpdir): - with pytest.raises(TypeError): - FileBase() - with pytest.raises(FileNotFoundError): - FileBase('', '') - with pytest.raises(IsADirectoryError): - FileBase(tmpdir.strpath, tmpdir.strpath) - # TODO: are there other cases here? path to a file that doesn't exist? permissions? - - def test_init(self, generic_conf_file): - generic_conf_file - - def test_extension_uppercase(self, tmpdir): - file_path = tmpdir.join('TEST.TXT') - file_path.write('testing') - file_path = file_path.strpath - file = FileBase(file_path, file_path) + @mock.patch('kittengroomer.helpers.magic') + def test_init_identify_extension(self, mock_libmagic): + """Init should identify the extension for src_path.""" + src_path = 'src/test.txt' + dst_path = 'dst/test.txt' + file = FileBase(src_path, dst_path) assert file.extension == '.txt' - def test_mimetypes(self, generic_conf_file): - assert generic_conf_file.mimetype == 'text/plain' - assert generic_conf_file.main_type == 'text' - assert generic_conf_file.sub_type == 'plain' - assert generic_conf_file.has_mimetype - # Need to test something without a mimetype - # Need to test something that's a directory - # Need to test something that causes the unicode exception + @mock.patch('kittengroomer.helpers.magic') + def test_init_uppercase_extension(self, mock_libmagic): + """Init should coerce uppercase extension to lowercase""" + src_path = 'src/TEST.TXT' + dst_path = 'dst/TEST.TXT' + file = FileBase(src_path, dst_path) + assert file.extension == '.txt' - def test_has_mimetype_no_main_type(self, generic_conf_file): - generic_conf_file.main_type = '' - assert generic_conf_file.has_mimetype is False + @mock.patch('kittengroomer.helpers.magic') + def test_has_extension_true(self, mock_libmagic): + """If the file has an extension, has_extension should == True.""" + src_path = 'src/test.txt' + dst_path = 'dst/test.txt' + file = FileBase(src_path, dst_path) + assert file.has_extension is True - def test_has_mimetype_no_sub_type(self, generic_conf_file): - generic_conf_file.sub_type = '' - assert generic_conf_file.has_mimetype is False + @mock.patch('kittengroomer.helpers.magic') + def test_has_extension_false(self, mock_libmagic): + """If the file has no extension, has_extensions should == False.""" + src_path = 'src/test' + dst_path = 'dst/test' + file = FileBase(src_path, dst_path) + assert file.has_extension is False - def test_has_extension(self, temp_file, temp_file_no_ext): - assert temp_file.has_extension is True - print(temp_file_no_ext.extension) - assert temp_file_no_ext.has_extension is False + def test_init_file_doesnt_exist(self): + """Init should raise an exception if the file doesn't exist.""" + with pytest.raises(FileNotFoundError): + FileBase('', '') - def test_set_property(self, generic_conf_file): - generic_conf_file.set_property('test', True) - assert generic_conf_file.get_property('test') is True - assert generic_conf_file.get_property('wrong') is None + def test_init_srcpath_is_directory(self, tmpdir): + """Init should raise an exception if given a path to a directory.""" + with pytest.raises(IsADirectoryError): + FileBase(tmpdir.strpath, tmpdir.strpath) - def test_marked_dangerous(self, file_marked_all_parameterized): - file_marked_all_parameterized.make_dangerous() - assert file_marked_all_parameterized.is_dangerous is True - # Should work regardless of weird paths?? - # Should check file path alteration behavior as well + @mock.patch('kittengroomer.helpers.magic') + def test_init_symlink(self, mock_libmagic, symlink_file_path): + """Init should properly identify symlinks.""" + file = FileBase(symlink_file_path, '') + assert file.mimetype == 'inode/symlink' - def test_generic_dangerous(self, generic_conf_file): - assert generic_conf_file.is_dangerous is False - generic_conf_file.make_dangerous() - assert generic_conf_file.is_dangerous is True + @mock.patch('kittengroomer.helpers.magic') + def test_is_symlink_attribute(self, mock_libmagic, symlink_file_path): + """If a file is a symlink, is_symlink should return True.""" + file = FileBase(symlink_file_path, '') + assert file.is_symlink is True - def test_has_symlink(self, tmpdir): - file_path = tmpdir.join('test.txt') - file_path.write('testing') - file_path = file_path.strpath - symlink_path = tmpdir.join('symlinked.txt') - symlink_path = symlink_path.strpath - os.symlink(file_path, symlink_path) - file = FileBase(file_path, file_path) - symlink = FileBase(symlink_path, symlink_path) - assert file.is_symlink is False - assert symlink.is_symlink is True + def test_init_mimetype_attribute_assigned_correctly(self): + """When libmagic returns a given mimetype, the mimetype should be + assigned properly.""" + with mock.patch('kittengroomer.helpers.magic.from_file', + return_value='text/plain'): + file = FileBase('', '') + assert file.mimetype == 'text/plain' - def test_has_symlink_fixture(self, symlink_file): - assert symlink_file.is_symlink is True + def test_maintype_and_subtype_attributes(self): + """If a file has a full mimetype, maintype and subtype should == + the appropriate values.""" + with mock.patch('kittengroomer.helpers.magic.from_file', + return_value='text/plain'): + file = FileBase('', '') + assert file.maintype == 'text' + assert file.subtype == 'plain' - def test_generic_make_unknown(self, generic_conf_file): - assert generic_conf_file.is_unknown is False - generic_conf_file.make_unknown() - assert generic_conf_file.is_unknown - # given a FileBase object with no marking, should do the right things + def test_has_mimetype_no_full_type(self): + """If a file doesn't have a full mimetype has_mimetype should == False.""" + with mock.patch('kittengroomer.helpers.magic.from_file', + return_value='data'): + file = FileBase('', '') + assert file.has_mimetype is False - def test_marked_make_unknown(self, file_marked_all_parameterized): - file = file_marked_all_parameterized - if file.is_unknown: - file.make_unknown() - assert file.is_unknown - else: - assert file.is_unknown is False - file.make_unknown() - assert file.is_unknown is False - # given a FileBase object with an unrecognized marking, should ??? + def test_has_mimetype_mimetype_is_none(self): + """If a file doesn't have a full mimetype has_mimetype should == False.""" + with mock.patch('kittengroomer.helpers.FileBase._determine_mimetype', + return_value=None): + file = FileBase('', '') + assert file.has_mimetype is False - def test_generic_make_binary(self, generic_conf_file): - assert generic_conf_file.is_binary is False - generic_conf_file.make_binary() - assert generic_conf_file.is_binary + # File properties - def test_marked_make_binary(self, file_marked_all_parameterized): - file = file_marked_all_parameterized - if file.is_dangerous: - file.make_binary() - assert file.is_binary is False - else: - file.make_binary() - assert file.is_binary + def get_property_doesnt_exist(self, text_file): + """Trying to get a property that doesn't exist should return None.""" + assert text_file.get_property('thing') is None - def test_force_ext_change(self, generic_conf_file): - assert generic_conf_file.has_extension - assert generic_conf_file.get_property('extension') == '.conf' - assert os.path.splitext(generic_conf_file.dst_path)[1] == '.conf' - generic_conf_file.force_ext('.txt') - assert os.path.splitext(generic_conf_file.dst_path)[1] == '.txt' - assert generic_conf_file.get_property('extension') == '.txt' - # should be able to handle weird paths + def get_property_builtin(self, text_file): + """Getting a property that's been set should return that property.""" + assert text_file.get_property('is_dangerous') is False - def test_force_ext_correct(self, generic_conf_file): - assert generic_conf_file.has_extension - assert generic_conf_file.get_property('extension') == '.conf' - generic_conf_file.force_ext('.conf') - assert os.path.splitext(generic_conf_file.dst_path)[1] == '.conf' - assert generic_conf_file.get_property('force_ext') is None - # shouldn't change a file's extension if it already is right + def get_property_user_defined(self, text_file): + """Getting a user defined property should return that property.""" + text_file._user_defined = {'thing': True} + assert text_file.get_property('thing') is True - def test_create_metadata_file(self, temp_file): - metadata_file_path = temp_file.create_metadata_file('.metadata.txt') - with open(metadata_file_path, 'w+') as metadata_file: - metadata_file.write('Have some metadata!') - # Shouldn't be able to make a metadata file with no extension - assert temp_file.create_metadata_file('') is False - # if metadata file already exists - # if there is no metadata to write should this work? + def set_property_user_defined(self, text_file): + """Setting a non-default property should make it available for + get_property.""" + text_file.set_property('thing', True) + assert text_file.get_property('thing') is True - def test_safe_copy(self, generic_conf_file): - generic_conf_file.safe_copy() - # check that safe copy can handle weird file path inputs + def set_property_builtin(self, text_file): + """Setting a builtin property should assign that property.""" + text_file.set_property('is_dangerous', True) + assert text_file.get_property('is_dangerous') is True + + def test_add_new_description(self, text_file): + """Adding a new description should add it to the list of description strings.""" + text_file.add_description('thing') + assert text_file.get_property('description_string') == ['thing'] + + def test_add_description_exists(self, text_file): + """Adding a description that already exists shouldn't duplicate it.""" + text_file.add_description('thing') + text_file.add_description('thing') + assert text_file.get_property('description_string') == ['thing'] + + def test_add_description_not_string(self, text_file): + """Adding a description that isn't a string should raise an error.""" + with pytest.raises(TypeError): + text_file.add_description(123) + + def test_add_new_error(self, text_file): + """Adding a new error should add it to the dict of errors.""" + text_file.add_error(Exception, 'thing') + assert text_file.get_property('_errors') == {Exception: 'thing'} + + def test_normal_file_mark_dangerous(self, text_file): + """Marking a file dangerous should identify it as dangerous.""" + text_file.make_dangerous() + assert text_file.is_dangerous is True + + def test_normal_file_mark_dangerous_filename_change(self, text_file): + """Marking a file dangerous should mangle the filename.""" + filename = text_file.filename + text_file.make_dangerous() + assert text_file.filename == 'DANGEROUS_{}_DANGEROUS'.format(filename) + + def test_normal_file_mark_dangerous_add_description(self, text_file): + """Marking a file as dangerous and passing in a description should add + that description to the file.""" + text_file.make_dangerous('thing') + assert text_file.get_property('description_string') == ['thing'] + + def test_dangerous_file_mark_dangerous(self, text_file): + """Marking a dangerous file as dangerous should do nothing, and the + file should remain dangerous.""" + text_file.make_dangerous() + text_file.make_dangerous() + assert text_file.is_dangerous is True + + def test_force_ext_change_filepath(self, text_file): + """Force_ext should modify the path of the file to end in the + new extension.""" + text_file.force_ext('.test') + assert text_file.dst_path.endswith('.test') + + def test_force_ext_add_dot(self, text_file): + """Force_ext should add a dot to an extension given without one.""" + text_file.force_ext('test') + assert text_file.dst_path.endswith('.test') + + def test_force_ext_change_extension_attr(self, text_file): + """Force_ext should modify the extension attribute""" + text_file.force_ext('.thing') + assert text_file.extension == '.thing' + + def test_force_ext_no_change(self, text_file): + """Force_ext should do nothing if the current extension is the same + as the new extension.""" + text_file.force_ext('.txt') + assert text_file.extension == '.txt' + assert '.txt.txt' not in text_file.dst_path + + def test_safe_copy_calls_copy(self, src_dir_path, dest_dir_path): + """Calling safe_copy should copy the file from the correct path to + the correct destination path.""" + file_path = os.path.join(src_dir_path, 'test.txt') + with open(file_path, 'w+') as file: + file.write('') + dst_path = os.path.join(dest_dir_path, 'test.txt') + with mock.patch('kittengroomer.helpers.magic.from_file', + return_value='text/plain'): + file = FileBase(file_path, dst_path) + with mock.patch('kittengroomer.helpers.shutil.copy') as mock_copy: + file.safe_copy() + mock_copy.assert_called_once_with(file_path, dst_path) + + def test_safe_copy_makedir_doesnt_exist(self): + """Calling safe_copy should create intermediate directories in the path + if they don't exist.""" + pass + + def test_safe_copy_makedir_exists(self): + """Calling safe_copy when some intermediate directories exist should + result in the creation of the full path and the file.""" + pass + + def test_create_metadata_file_new(self): + pass + + def test_create_metadata_file_already_exists(self): + pass -class TestLogger: +class TestLogging: - pass + def test_computehash(self): + """Computehash should return the correct sha256 hash of a given file.""" + pass class TestKittenGroomerBase: - @fixture - def source_directory(self): - return 'tests/src_invalid' + @fixture(scope='class') + def src_dir_path(self, tmpdir_factory): + return tmpdir_factory.mktemp('src').strpath + + @fixture(scope='class') + def dest_dir_path(self, tmpdir_factory): + return tmpdir_factory.mktemp('dest').strpath @fixture - def dest_directory(self): - return 'tests/dst' + def groomer(self, src_dir_path, dest_dir_path): + return KittenGroomerBase(src_dir_path, dest_dir_path) - @fixture - def generic_groomer(self, source_directory, dest_directory): - return KittenGroomerBase(source_directory, dest_directory) - - def test_create(self, generic_groomer): - assert generic_groomer - - def test_instantiation(self, source_directory, dest_directory): - KittenGroomerBase(source_directory, dest_directory) - - def test_list_all_files(self, tmpdir): + def test_list_all_files_includes_file(self, tmpdir, groomer): + """Calling list_all_files should include files in the given path.""" file = tmpdir.join('test.txt') file.write('testing') + files = groomer.list_all_files(tmpdir.strpath) + assert file.strpath in files + + def test_list_all_files_excludes_dir(self, tmpdir, groomer): + """Calling list_all_files shouldn't include directories in the given + path.""" testdir = tmpdir.join('testdir') os.mkdir(testdir.strpath) - simple_groomer = KittenGroomerBase(tmpdir.strpath, tmpdir.strpath) - files = simple_groomer.list_all_files(simple_groomer.src_root_path) - assert file.strpath in files + files = groomer.list_all_files(tmpdir.strpath) assert testdir.strpath not in files + + def test_safe_remove(self, groomer, src_dir_path): + """Calling safe_remove should not raise an Exception if trying to + remove a file that doesn't exist.""" + groomer.safe_remove(os.path.join(src_dir_path, 'thing')) + + def test_safe_mkdir_file_exists(self, groomer, dest_dir_path): + """Calling safe_mkdir should not overwrite an existing directory.""" + filepath = os.path.join(dest_dir_path, 'thing') + os.mkdir(filepath) + groomer.safe_mkdir(filepath) + + def test_processdir_not_implemented(self, groomer): + """Calling processdir should raise an Implementation Required error.""" + with pytest.raises(ImplementationRequired): + groomer.processdir('.', '.') diff --git a/tests/testfile_catalog.md b/tests/testfile_catalog.md deleted file mode 100644 index 692daf8..0000000 --- a/tests/testfile_catalog.md +++ /dev/null @@ -1,12 +0,0 @@ -src_invalid -=========== - -- - - - -src_valid -========= - -- Example.jpg: image/jpeg, obtained from wikipedia.org -- blah.conf: text file with a .conf extension \ No newline at end of file diff --git a/tests/test_logs/.keepdir b/tests/uncategorized/.keepdir similarity index 100% rename from tests/test_logs/.keepdir rename to tests/uncategorized/.keepdir