Merge pull request #16 from dputtick/dev

Bug fixes, test file catalog, refactoring
pull/25/head
Raphaël Vinot 2017-07-18 14:03:22 +02:00 committed by GitHub
commit 338bd5a018
38 changed files with 840 additions and 669 deletions

4
.gitignore vendored
View File

@ -71,8 +71,8 @@ tests/dst/*
tests/*_dst
tests/test_logs/*
!tests/**/.keepdir
!tests/src_invalid/*
!tests/src_valid/*
!tests/dangerous/*
!tests/normal/*
pdfid.py
# Plugins are pdfid stuff
plugin_*

View File

@ -9,7 +9,7 @@ python:
- nightly
sudo: required
# do we need sudo? should double check
# https://docs.travis-ci.com/user/ci-environment/#Virtualization-environments
dist: trusty
@ -36,7 +36,7 @@ install:
- pip install git+https://github.com/grierforensics/officedissector.git
# PyCIRCLean dependencies
- pip install -r dev-requirements.txt
- pip install coveralls codecov
- pip install codecov
# Testing dependencies
- sudo apt-get install rar
# Prepare tests
@ -45,7 +45,7 @@ install:
- pushd theZoo/malwares/Binaries
- python unpackall.py
- popd
- mv theZoo/malwares/Binaries/out tests/src_invalid/
- mv theZoo/malwares/Binaries/out tests/uncategorized/
# Path traversal attacks
- git clone https://github.com/jwilk/path-traversal-samples
- pushd path-traversal-samples
@ -56,25 +56,23 @@ install:
- make
- popd
- popd
- mv path-traversal-samples/zip/*.zip tests/src_invalid/
- mv path-traversal-samples/rar/*.rar tests/src_invalid/
- mv path-traversal-samples/zip/*.zip tests/uncategorized/
- mv path-traversal-samples/rar/*.rar tests/uncategorized/
# Office docs
- git clone https://github.com/eea/odfpy.git
- mv odfpy/tests/examples/* tests/src_invalid/
- pushd tests/src_invalid/
- mv odfpy/tests/examples/* tests/uncategorized/
- pushd tests/uncategorized/
- wget https://bitbucket.org/decalage/olefileio_pl/raw/3073963b640935134ed0da34906fea8e506460be/Tests/images/test-ole-file.doc
- wget --no-check-certificate https://www.officedissector.com/corpus/fraunhoferlibrary.zip
- unzip -o fraunhoferlibrary.zip
- rm fraunhoferlibrary.zip
- 7z x -p42 42.zip
# Some random samples
# - wget http://www.sample-videos.com/audio/mp3/india-national-anthem.mp3
# - wget http://www.sample-videos.com/video/mp4/720/big_buck_bunny_720p_1mb.mp4
- wget http://thewalter.net/stef/software/rtfx/sample.rtf
- popd
# - pushd tests/dangerous/
# - 7z x -p42 42.zip
# - popd
script:
- travis_wait 30 py.test --cov=kittengroomer --cov=bin tests/
- travis_wait py.test --cov=kittengroomer --cov=bin tests/
notifications:
email:
@ -82,5 +80,4 @@ notifications:
on_failure: change
after_success:
- coveralls
- codecov

View File

@ -14,8 +14,6 @@ import officedissector
import warnings
import exifread
from PIL import Image
# TODO: why do we have this import? How does filecheck handle pngs?
# from PIL import PngImagePlugin
from pdfid import PDFiD, cPDFiD
from kittengroomer import FileBase, KittenGroomerBase, Logging
@ -119,14 +117,20 @@ class Config:
class File(FileBase):
"""
Main file object
Created for each file that is processed by KittenGroomer. Contains all
filetype-specific processing methods.
"""
def __init__(self, src_path, dst_path, logger):
super(File, self).__init__(src_path, dst_path)
self.is_recursive = False
self.is_archive = False
self.logger = logger
self.tempdir_path = self.dst_path + '_temp'
subtypes_apps = [
subtypes_apps = (
(Config.mimes_office, self._winoffice),
(Config.mimes_ooxml, self._ooxml),
(Config.mimes_rtf, self.text),
@ -136,13 +140,13 @@ class File(FileBase):
(Config.mimes_ms, self._executables),
(Config.mimes_compressed, self._archive),
(Config.mimes_data, self._binary_app),
]
)
self.app_subtype_methods = self._make_method_dict(subtypes_apps)
types_metadata = [
types_metadata = (
(Config.mimes_exif, self._metadata_exif),
(Config.mimes_png, self._metadata_png),
]
)
self.metadata_mimetype_methods = self._make_method_dict(types_metadata)
self.mime_processing_options = {
@ -204,19 +208,34 @@ class File(FileBase):
self.make_dangerous('Extension does not match expected extensions for this mimetype')
def _check_filename(self):
if self.filename[0] is '.':
# TODO: handle dotfiles here
pass
"""
Verify the filename
If the filename contains any dangerous or specific characters, handle
them appropriately.
"""
if self.filename.startswith('.'):
macos_hidden_files = set(
'.Trashes', '._.Trashes', '.DS_Store', '.fseventsd', '.Spotlight-V100'
)
if self.filename in macos_hidden_files:
self.add_description('MacOS hidden metadata file.')
self.should_copy = False
right_to_left_override = u"\u202E"
if right_to_left_override in self.filename:
self.make_dangerous('Filename contains dangerous character')
self.dst_path = self.dst_path.replace(right_to_left_override, '')
# TODO: change self.filename and'filename' property? Or should those reflect the values on the source key
new_filename = self.filename.replace(right_to_left_override, '')
self.set_property('filename', new_filename)
def check(self):
if self.main_type in Config.ignored_mimes:
"""
Main file processing method
Delegates to various helper methods including filetype-specific checks.
"""
if self.maintype in Config.ignored_mimes:
self.should_copy = False
self.mime_processing_options.get(self.main_type, self.unknown)()
self.mime_processing_options.get(self.maintype, self.unknown)()
else:
self._check_dangerous()
self._check_filename()
@ -225,13 +244,14 @@ class File(FileBase):
if self.has_mimetype:
self._check_mimetype()
if not self.is_dangerous:
self.mime_processing_options.get(self.main_type, self.unknown)()
self.mime_processing_options.get(self.maintype, self.unknown)()
def write_log(self):
"""Pass information about the file to self.logger"""
props = self.get_all_props()
if not self.is_recursive:
if not self.is_archive:
if os.path.exists(self.tempdir_path):
# Hack to make images appear at the correct tree depth in log
# FIXME: Hack to make images appear at the correct tree depth in log
self.logger.add_file(self.src_path, props, in_tempdir=True)
return
self.logger.add_file(self.src_path, props)
@ -293,13 +313,13 @@ class File(FileBase):
def text(self):
"""Process an rtf, ooxml, or plaintext file."""
for mt in Config.mimes_rtf:
if mt in self.sub_type:
if mt in self.subtype:
self.add_description('Rich Text (rtf) file')
# TODO: need a way to convert it to plain text
self.force_ext('.txt')
return
for mt in Config.mimes_ooxml:
if mt in self.sub_type:
if mt in self.subtype:
self.add_description('OOXML (openoffice) file')
self._ooxml()
return
@ -308,12 +328,11 @@ class File(FileBase):
def application(self):
"""Process an application specific file according to its subtype."""
for subtype, method in self.app_subtype_methods.items():
if subtype in self.sub_type:
# TODO: should we change the logic so we don't iterate through all of the subtype methods?
# TODO: should these methods return a value?
if self.subtype in self.app_subtype_methods:
method = self.app_subtype_methods[self.subtype]
method()
return
# TODO: should these application methods return a value?
else:
self._unknown_app()
def _executables(self):
@ -346,9 +365,7 @@ class File(FileBase):
self.make_dangerous('WinOffice file containing a macro')
for i in indicators:
if i.id == 'ObjectPool' and i.value:
# TODO: is having an ObjectPool suspicious?
# LOG: user defined property
self.add_description('WinOffice file containing an object pool')
self.make_dangerous('WinOffice file containing an object pool')
elif i.id == 'flash' and i.value:
self.make_dangerous('WinOffice file with embedded flash')
self.add_description('WinOffice file')
@ -372,7 +389,7 @@ class File(FileBase):
if len(doc.features.embedded_packages) > 0:
self.make_dangerous('Ooxml file with embedded packages')
if not self.is_dangerous:
self.add_description('OOXML file')
self.add_description('Ooxml file')
def _libreoffice(self):
"""Process a libreoffice file."""
@ -423,17 +440,15 @@ class File(FileBase):
# TODO: change this to something archive type specific instead of generic 'Archive'
self.add_description('Archive')
self.should_copy = False
self.is_recursive = True
self.is_archive = True
def _unknown_app(self):
"""Process an unknown file."""
self.add_description('Unknown application file')
self.make_unknown()
self.make_dangerous('Unknown application file')
def _binary_app(self):
"""Process an unknown binary file."""
self.add_description('Unknown binary file')
self.make_binary()
self.make_dangerous('Unknown binary file')
#######################
# Metadata extractors
@ -557,7 +572,7 @@ class GroomerLogger(object):
self.log_debug_out = os.devnull
def _make_log_dir(self, root_dir_path):
"""Make the directory in the dest dir that will hold the logs"""
"""Create the directory in the dest dir that will hold the logs"""
log_dir_path = os.path.join(root_dir_path, 'logs')
if os.path.exists(log_dir_path):
shutil.rmtree(log_dir_path)
@ -565,6 +580,7 @@ class GroomerLogger(object):
return log_dir_path
def _add_root_dir(self, root_path):
"""Add the root directory to the log"""
dirname = os.path.split(root_path)[1] + '/'
with open(self.log_path, mode='ab') as lf:
lf.write(bytes(dirname, 'utf-8'))
@ -572,40 +588,51 @@ class GroomerLogger(object):
def add_file(self, file_path, file_props, in_tempdir=False):
"""Add a file to the log. Takes a dict of file properties."""
# TODO: fix var names in this method
# TODO: handle symlinks better: symlink_string = '{}+-- {}\t- Symbolic link to {}\n'.format(padding, f, os.readlink(curpath))
props = file_props
depth = self._get_path_depth(file_path)
description_string = ', '.join(props['description_string'])
description_string = ', '.join(file_props['description_string'])
file_hash = Logging.computehash(file_path)[:6]
if props['safety_category'] is None:
descr_cat = "Normal"
if file_props['is_dangerous']:
description_category = "Dangerous"
else:
descr_cat = props['safety_category'].capitalize()
# TODO: make size adjust to MB/GB for large files
size = str(props['file_size']) + 'B'
file_template = "+- {name} ({sha_hash}): {size}, {mt}/{st}. {desc}: {desc_str}"
description_category = "Normal"
size_string = self._format_file_size(file_props['file_size'])
file_template = "+- {name} ({sha_hash}): {size}, type: {mt}/{st}. {desc}: {desc_str}"
file_string = file_template.format(
name=props['filename'],
name=file_props['filename'],
sha_hash=file_hash,
size=size,
mt=props['maintype'],
st=props['subtype'],
desc=descr_cat,
size=size_string,
mt=file_props['maintype'],
st=file_props['subtype'],
desc=description_category,
desc_str=description_string,
# errs='' # TODO: add errors in human readable form here
)
# TODO: finish adding Errors and check that they appear properly
# if file_props['errors']:
# error_string = ', '.join([str(key) for key in file_props['errors']])
# file_string.append(' Errors: ' + error_string)
if in_tempdir:
depth -= 1
self._write_line_to_log(file_string, depth)
def add_dir(self, dir_path):
"""Add a directory to the log"""
path_depth = self._get_path_depth(dir_path)
dirname = os.path.split(dir_path)[1] + '/'
log_line = '+- ' + dirname
self._write_line_to_log(log_line, path_depth)
def _format_file_size(self, size):
"""Returns a string with the file size and appropriate unit"""
file_size = size
for unit in ('B', 'KB', 'MB', 'GB'):
if file_size < 1024:
return str(int(file_size)) + unit
else:
file_size = file_size / 1024
return str(int(file_size)) + 'GB'
def _get_path_depth(self, path):
"""Returns the relative path depth compared to root directory"""
if self._dst_root_path in path:
base_path = self._dst_root_path
elif self._src_root_path in path:
@ -615,6 +642,11 @@ class GroomerLogger(object):
return path_depth
def _write_line_to_log(self, line, indentation_depth):
"""
Write a line to the log
Pad the line according to the `indentation_depth`.
"""
padding = b' '
padding += b'| ' * indentation_depth
line_bytes = os.fsencode(line)
@ -630,18 +662,17 @@ class KittenGroomerFileCheck(KittenGroomerBase):
super(KittenGroomerFileCheck, self).__init__(root_src, root_dst)
self.recursive_archive_depth = 0
self.max_recursive_depth = max_recursive_depth
self.cur_file = None
self.logger = GroomerLogger(root_src, root_dst, debug)
def process_dir(self, src_dir, dst_dir):
"""Process a directory on the source key."""
for srcpath in self.list_files_dirs(src_dir):
if os.path.isdir(srcpath):
if not os.path.islink(srcpath) and os.path.isdir(srcpath):
self.logger.add_dir(srcpath)
else:
dstpath = os.path.join(dst_dir, os.path.basename(srcpath))
self.cur_file = File(srcpath, dstpath, self.logger)
self.process_file(self.cur_file)
cur_file = File(srcpath, dstpath, self.logger)
self.process_file(cur_file)
def process_file(self, file):
"""
@ -655,7 +686,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
file.safe_copy()
file.set_property('copied', True)
file.write_log()
if file.is_recursive:
if file.is_archive:
self.process_archive(file)
# TODO: Can probably handle cleaning up the tempdir better
if hasattr(file, 'tempdir_path'):
@ -673,8 +704,6 @@ class KittenGroomerFileCheck(KittenGroomerBase):
file.make_dangerous('Archive bomb')
else:
tempdir_path = file.make_tempdir()
# TODO: double check we are properly escaping file.src_path
# otherwise we are running unsanitized user input directly in the shell
command_str = '{} -p1 x "{}" -o"{}" -bd -aoa'
unpack_command = command_str.format(SEVENZ_PATH,
file.src_path, tempdir_path)
@ -695,12 +724,20 @@ class KittenGroomerFileCheck(KittenGroomerBase):
return True
def list_files_dirs(self, root_dir_path):
"""
Returns a list of all files and directories
Performs a depth-first traversal of the file tree.
"""
queue = []
for path in sorted(os.listdir(root_dir_path), key=lambda x: str.lower(x)):
full_path = os.path.join(root_dir_path, path)
if os.path.isdir(full_path):
# check for symlinks first to prevent getting trapped in infinite symlink recursion
if os.path.islink(full_path):
queue.append(full_path)
queue += self.list_files_dirs(full_path) # if path is a dir, recurse through its contents
elif os.path.isdir(full_path):
queue.append(full_path)
queue += self.list_files_dirs(full_path)
elif os.path.isfile(full_path):
queue.append(full_path)
return queue

View File

@ -1,3 +1,4 @@
python-magic
pytest
pytest-cov
PyYAML

View File

@ -16,19 +16,6 @@ import argparse
import magic
class KittenGroomerError(Exception):
"""Base KittenGroomer exception handler."""
def __init__(self, message):
super(KittenGroomerError, self).__init__(message)
self.message = message
class ImplementationRequired(KittenGroomerError):
"""Implementation required error."""
pass
class FileBase(object):
"""
Base object for individual files in the source directory.
@ -43,82 +30,44 @@ class FileBase(object):
Create various properties and determine the file's mimetype.
"""
self.src_path = src_path
self.dst_path = dst_path
self.filename = os.path.basename(self.src_path)
self._file_props = {
'filepath': self.src_path,
'filename': self.filename,
'file_size': self.size,
'maintype': None,
'subtype': None,
'extension': None,
'safety_category': None,
'symlink': False,
'copied': False,
'description_string': [], # array of descriptions to be joined
'errors': {},
'user_defined': {}
}
self.extension = self._determine_extension()
self.set_property('extension', self.extension)
self.mimetype = self._determine_mimetype()
self.dst_dir = os.path.dirname(dst_path)
self.filename = os.path.basename(src_path)
self.size = self._get_size(src_path)
self.is_dangerous = False
self.copied = False
self.symlink_path = None
self.description_string = [] # array of descriptions to be joined
self._errors = {}
self._user_defined = {}
self.should_copy = True
self.main_type = None
self.sub_type = None
if self.mimetype:
self.main_type, self.sub_type = self._split_subtypes(self.mimetype)
if self.main_type:
self.set_property('maintype', self.main_type)
if self.sub_type:
self.set_property('subtype', self.sub_type)
def _determine_extension(self):
_, ext = os.path.splitext(self.src_path)
ext = ext.lower()
if ext == '':
ext = None
return ext
def _determine_mimetype(self):
if os.path.islink(self.src_path):
# magic will throw an IOError on a broken symlink
mimetype = 'inode/symlink'
self.set_property('symlink', os.readlink(self.src_path))
else:
try:
mt = magic.from_file(self.src_path, mime=True)
# Note: libmagic will always return something, even if it's just 'data'
except UnicodeEncodeError as e:
# FIXME: The encoding of the file that triggers this is broken (possibly it's UTF-16 and Python expects utf8)
# Note: one of the Travis files will trigger this exception
self.add_error(e, '')
mt = None
try:
mimetype = mt.decode("utf-8")
except:
mimetype = mt
return mimetype
def _split_subtypes(self, mimetype):
if '/' in mimetype:
main_type, sub_type = mimetype.split('/')
else:
main_type, sub_type = None, None
return main_type, sub_type
self.mimetype = self._determine_mimetype(src_path)
@property
def size(self):
"""Filesize in bytes as an int, 0 if file does not exist."""
try:
size = os.path.getsize(self.src_path)
except FileNotFoundError:
size = 0
return size
def dst_path(self):
return os.path.join(self.dst_dir, self.filename)
@property
def extension(self):
_, ext = os.path.splitext(self.filename)
if ext == '':
return None
else:
return ext.lower()
@property
def maintype(self):
main, _ = self._split_mimetype(self.mimetype)
return main
@property
def subtype(self):
_, sub = self._split_mimetype(self.mimetype)
return sub
@property
def has_mimetype(self):
"""True if file has a main and sub mimetype, else False."""
if not self.main_type or not self.sub_type:
if not self.maintype or not self.subtype:
return False
else:
return True
@ -131,43 +80,41 @@ class FileBase(object):
else:
return True
@property
def is_dangerous(self):
"""True if file has been marked 'dangerous', else False."""
return self._file_props['safety_category'] is 'dangerous'
@property
def is_unknown(self):
"""True if file has been marked 'unknown', else False."""
return self._file_props['safety_category'] is 'unknown'
@property
def is_binary(self):
"""True if file has been marked 'binary', else False."""
return self._file_props['safety_category'] is 'binary'
@property
def is_symlink(self):
"""True if file is a symlink, else False."""
if self._file_props['symlink'] is False:
if self.symlink_path is None:
return False
else:
return True
@property
def description_string(self):
return self.__description_string
@description_string.setter
def description_string(self, value):
if hasattr(self, 'description_string'):
if isinstance(value, str):
if value not in self.__description_string:
self.__description_string.append(value)
else:
raise TypeError("Description_string can only include strings")
else:
self.__description_string = value
def set_property(self, prop_string, value):
"""
Take a property and a value and add them to the file's property dict.
Take a property and a value and add them to the file's stored props.
If `prop_string` is part of the file property API, set it to `value`.
Otherwise, add `prop_string`: `value` to `user_defined` properties.
TODO: rewrite docstring
"""
if prop_string is 'description_string':
if value not in self._file_props['description_string']:
self._file_props['description_string'].append(value)
elif prop_string in self._file_props.keys():
self._file_props[prop_string] = value
if hasattr(self, prop_string):
setattr(self, prop_string, value)
else:
self._file_props['user_defined'][prop_string] = value
self._user_defined[prop_string] = value
def get_property(self, prop_string):
"""
@ -175,20 +122,34 @@ class FileBase(object):
Returns `None` if `prop_string` cannot be found on the file.
"""
if prop_string in self._file_props:
return self._file_props[prop_string]
elif prop_string in self._file_props['user_defined']:
return self._file_props['user_defined'][prop_string]
else:
return None
try:
return getattr(self, prop_string)
except AttributeError:
return self._user_defined.get(prop_string, None)
def get_all_props(self):
"""Return a dict containing all stored properties of this file."""
return self._file_props
# Maybe move this onto the logger? I think that makes more sense
props_dict = {
'filepath': self.src_path,
'filename': self.filename,
'file_size': self.size,
'mimetype': self.mimetype,
'maintype': self.maintype,
'subtype': self.subtype,
'extension': self.extension,
'is_dangerous': self.is_dangerous,
'symlink_path': self.symlink_path,
'copied': self.copied,
'description_string': self.description_string,
'errors': self._errors,
'user_defined': self._user_defined
}
return props_dict
def add_error(self, error, info_string):
"""Add an `error`: `info_string` pair to the file."""
self._file_props['errors'].update({error: info_string})
self._errors.update({error: info_string})
def add_description(self, description_string):
"""
@ -205,29 +166,11 @@ class FileBase(object):
Prepend and append DANGEROUS to the destination file name
to help prevent double-click of death.
"""
if self.is_dangerous:
self.set_property('description_string', reason_string)
return
self.set_property('safety_category', 'dangerous')
self.set_property('description_string', reason_string)
path, filename = os.path.split(self.dst_path)
self.dst_path = os.path.join(path, 'DANGEROUS_{}_DANGEROUS'.format(filename))
def make_unknown(self):
"""Mark file as an unknown type and prepend UNKNOWN to filename."""
if self.is_dangerous or self.is_binary:
return
self.set_property('safety_category', 'unknown')
path, filename = os.path.split(self.dst_path)
self.dst_path = os.path.join(path, 'UNKNOWN_{}'.format(filename))
def make_binary(self):
"""Mark file as a binary and append .bin to filename."""
if self.is_dangerous:
return
self.set_property('safety_category', 'binary')
path, filename = os.path.split(self.dst_path)
self.dst_path = os.path.join(path, '{}.bin'.format(filename))
if not self.is_dangerous:
self.set_property('is_dangerous', True)
self.filename = 'DANGEROUS_{}_DANGEROUS'.format(self.filename)
if reason_string:
self.add_description(reason_string)
def safe_copy(self, src=None, dst=None):
"""Copy file and create destination directories if needed."""
@ -236,52 +179,89 @@ class FileBase(object):
if dst is None:
dst = self.dst_path
try:
dst_path, filename = os.path.split(dst)
if not os.path.exists(dst_path):
os.makedirs(dst_path)
os.makedirs(self.dst_dir, exist_ok=True)
shutil.copy(src, dst)
except Exception as e:
except IOError as e:
# Probably means we can't write in the dest dir
self.add_error(e, '')
def force_ext(self, ext):
"""If dst_path does not end in ext, append .ext to it."""
ext = self._check_leading_dot(ext)
if not self.dst_path.endswith(ext):
# LOG: do we want to log that the extension was changed as below?
# self.set_property('force_ext', True)
self.dst_path += ext
if not self._file_props['extension'] == ext:
self.set_property('extension', ext)
def force_ext(self, extension):
"""If dst_path does not end in `extension`, append .ext to it."""
new_ext = self._check_leading_dot(extension)
if not self.filename.endswith(new_ext):
# TODO: log that the extension was changed
self.filename += new_ext
if not self.get_property('extension') == new_ext:
self.set_property('extension', new_ext)
def create_metadata_file(self, ext):
def create_metadata_file(self, extension):
# TODO: this method name is confusing
"""
Create a separate file to hold extracted metadata.
The string `ext` will be used as the extension for the metadata file.
The string `extension` will be used as the extension for the file.
"""
ext = self._check_leading_dot(ext)
ext = self._check_leading_dot(extension)
try:
# Prevent using the same path as another file from src_path
if os.path.exists(self.src_path + ext):
err_str = ("Could not create metadata file for \"" +
raise KittenGroomerError(
"Could not create metadata file for \"" +
self.filename +
"\": a file with that path already exists.")
raise KittenGroomerError(err_str)
"\": a file with that path exists.")
else:
dst_dir_path, filename = os.path.split(self.dst_path)
if not os.path.exists(dst_dir_path):
os.makedirs(dst_dir_path)
os.makedirs(self.dst_dir, exist_ok=True)
# TODO: shouldn't mutate state and also return something
self.metadata_file_path = self.dst_path + ext
return self.metadata_file_path
# TODO: can probably let this exception bubble up
except KittenGroomerError as e:
self.add_error(e, '')
return False
def _check_leading_dot(self, ext):
# TODO: this method name is confusing
if len(ext) > 0:
if not ext.startswith('.'):
return '.' + ext
return ext
def _determine_mimetype(self, file_path):
if os.path.islink(file_path):
# libmagic will throw an IOError on a broken symlink
mimetype = 'inode/symlink'
self.set_property('symlink_path', os.readlink(file_path))
else:
try:
mt = magic.from_file(file_path, mime=True)
# libmagic will always return something, even if it's just 'data'
except UnicodeEncodeError as e:
# FIXME: The encoding of the file that triggers this is broken (possibly it's UTF-16 and Python expects utf8)
# Note: one of the Travis files will trigger this exception
self.add_error(e, '')
mt = None
try:
mimetype = mt.decode("utf-8")
except:
# FIXME: what should the exception be here if mimetype isn't utf-8?
mimetype = mt
return mimetype
def _split_mimetype(self, mimetype):
if mimetype and '/' in mimetype:
main_type, sub_type = mimetype.split('/')
else:
main_type, sub_type = None, None
return main_type, sub_type
def _get_size(self, file_path):
"""Filesize in bytes as an int, 0 if file does not exist."""
try:
size = os.path.getsize(file_path)
except FileNotFoundError:
size = 0
return size
class Logging(object):
@ -331,13 +311,24 @@ class KittenGroomerBase(object):
#######################
# TODO: if we move main() we can get rid of this as well
def processdir(self, src_dir, dst_dir):
"""Implement this function to define file processing behavior."""
raise ImplementationRequired('Please implement processdir.')
# TODO: Should this get moved to filecheck? It isn't really API code and somebody can implement it themselves
class KittenGroomerError(Exception):
"""Base KittenGroomer exception handler."""
def __init__(self, message):
super(KittenGroomerError, self).__init__(message)
self.message = message
class ImplementationRequired(KittenGroomerError):
"""Implementation required error."""
pass
def main(kg_implementation, description='Call a KittenGroomer implementation to process files present in the source directory and copy them to the destination directory.'):
parser = argparse.ArgumentParser(prog='KittenGroomer', description=description)
parser.add_argument('-s', '--source', type=str, help='Source directory')

Binary file not shown.

56
tests/file_catalog.yaml Normal file
View File

@ -0,0 +1,56 @@
#YAML
# Possible fields:
# description:
# mimetype:
# xfail:
normal:
Example.gif: # Added: 27-06-2017, source: https://en.wikipedia.org/wiki/File:Example.gif
mimetype: image/gif
Example.jpg: # Added: 27-06-2017, source: https://en.wikipedia.org/wiki/File:Example.jpg
mimetype: image/jpeg
Example.ogg: # Added: 27-06-2017, source: https://en.wikipedia.org/wiki/File:Example.ogg
description: Ogg vorbis sound file
mimetype: audio/ogg
xfail: True
Example.png: # Added: 27-06-2017, source: https://en.wikipedia.org/wiki/File:Example.png
mimetype: image/png
Example.svg: # Added: 27-06-2017, source: https://en.wikipedia.org/wiki/File:Example.svg
mimetype: image/svg+xml
xfail: True
pdf-sample.pdf: # Added: 27-06-2017, source: http://che.org.il/wp-content/uploads/2016/12/pdf-sample.pdf
mimetype: application/pdf
plaintext.txt: # Added: 27-06-2017, source: hand-generated
mimetype: text/plain
rar_archive.rar: # Added: 27-06-2017, Rar archive. Source: hand-generated
description: rar archive
mimetype: application/x-rar
xfail: True
rich_text.rtf: # Added 27-06-2017), source: hand-generated
mimetype: text/rtf
sample_mpeg4.mp4: # Added 28-06-2017, source: https://support.apple.com/en-us/HT201549
mimetype: video/mp4
zip_archive.zip: # Added 27-06-2017, source: hand-generated
mimetype: application/zip
dangerous:
# 42.zip: # Added 27-06-2017, source: http://www.unforgettable.dk/42.zip
# description: zip archivebomb, password is '42'
# mimetype: application/zip
# xfail: True
archivebomb.zip: # Added 16-07-2017, source: hand-generated
description: zip archivebomb with 3 levels
mimetype: application/zip
xfail: True
autorun.inf: # Added 27-06-2017, source: hand-generated
description: Microsoft autorun file
mimetype: text/plain
config_file.conf: # Added 27-06-2017, source: hand-generated
description: config file
mimetype: text/plain
message.msg: # Added 27-06-2017, source: ????
description: message file, used by Outlook etc
mimetype: message/rfc822
testRTLexe.txt: # Added 27-06-2017, source: hand-generated
description: text file with right-to-left character in filename
mimetype: text/plain

BIN
tests/normal/Example.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.9 KiB

View File

Before

Width:  |  Height:  |  Size: 27 KiB

After

Width:  |  Height:  |  Size: 27 KiB

BIN
tests/normal/Example.ogg Normal file

Binary file not shown.

BIN
tests/normal/Example.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.3 KiB

69
tests/normal/Example.svg Normal file
View File

@ -0,0 +1,69 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<svg
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:cc="http://web.resource.org/cc/"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns="http://www.w3.org/2000/svg"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
width="600"
height="600"
id="svg2"
sodipodi:version="0.32"
inkscape:version="0.45.1"
sodipodi:docname="Example.svg"
inkscape:output_extension="org.inkscape.output.svg.inkscape"
sodipodi:docbase="/home/gmaxwell"
version="1.0">
<metadata
id="metadata9">
<rdf:RDF>
<cc:Work
rdf:about="">
<dc:format>image/svg+xml</dc:format>
<dc:type
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
</cc:Work>
</rdf:RDF>
</metadata>
<sodipodi:namedview
inkscape:window-height="620"
inkscape:window-width="814"
inkscape:pageshadow="2"
inkscape:pageopacity="0.0"
guidetolerance="10.0"
gridtolerance="10.0"
objecttolerance="10.0"
borderopacity="1.0"
bordercolor="#666666"
pagecolor="#ffffff"
id="base"
width="600px"
height="600px"
inkscape:zoom="0.35974058"
inkscape:cx="50"
inkscape:cy="519.04966"
inkscape:window-x="483"
inkscape:window-y="101"
inkscape:current-layer="svg2" />
<defs
id="defs16" />
<g
id="g2161"
transform="matrix(6.3951354,0,0,6.3951354,-22.626246,-7.1082509)">
<path
nodetypes="ccccccccccccccccccccccccccccccccccccccc"
id="flowRoot1882"
d="M 36.009766,9.2505083 C 37.739295,9.4211273 38.305879,11.470697 38.052581,12.935049 C 37.346266,16.153899 36.316821,19.51466 35.445405,22.717701 C 36.091666,24.812224 31.712284,24.008877 33.219932,22.315459 C 34.817041,18.411202 36.011404,13.498336 36.009766,9.2505083 z M 36.009766,2.9926958 C 38.210311,1.2242088 40.996268,9.172757 33.911571,6.1534847 C 33.884619,5.7603019 36.096289,3.3869447 36.009766,2.9926958 z M 41.371094,15.871601 C 41.371094,13.66457 41.371094,11.457539 41.371094,9.250508 C 43.180621,9.4257387 43.963014,11.704559 43.286137,13.215517 C 42.859084,15.059792 42.939241,17.3996 44.601487,18.625335 C 46.710544,19.683477 49.38774,17.353112 48.803268,15.118437 C 48.93196,13.406538 48.236292,11.613848 48.968862,9.9690415 C 51.055097,9.6500357 51.500677,12.487155 50.544985,13.844747 C 50.070023,15.309708 50.857452,16.781898 50.672344,18.239432 C 50.279615,19.94056 48.418404,20.00023 47.0225,20.071868 C 45.478489,20.38194 43.516835,20.791368 42.361947,19.38874 C 41.522514,18.444089 41.211274,17.107671 41.371094,15.871601 z M 61.224609,9.5727739 C 60.41978,11.557552 58.100804,10.235616 56.62767,10.571551 C 53.836862,14.393611 60.920038,13.513667 61.8085,17.011648 C 61.85613,18.933747 60.028359,20.587389 58.129091,20.443312 C 56.904487,20.607229 54.609204,20.982393 54.417879,19.267622 C 55.280609,17.508269 57.336359,19.528803 58.633111,18.8463 C 60.403141,17.99081 59.402232,15.555325 57.728781,15.321475 C 56.550115,14.98135 55.091813,15.225439 54.254747,14.112764 C 53.017669,12.881167 53.392132,10.733148 54.736719,9.7413252 C 56.619172,8.3307396 59.170326,8.9535067 61.224609,9.5727739 z M 66.458984,6.1450396 C 65.368126,7.6333334 67.348936,9.9531574 68.987229,9.0948979 C 69.978133,11.042503 66.524641,10.777931 66.473495,12.430992 C 64.443605,16.101814 68.48273,18.623426 67.571657,20.417528 C 65.440858,20.26155 64.324307,17.844452 64.577433,15.919357 C 64.70847,14.408586 65.055107,12.79361 64.322961,11.373941 C 63.786422,9.5475192 64.150419,7.1452655 65.954233,6.1552477 L 66.206043,6.1203323 L 66.458984,6.1450396 L 66.458984,6.1450396 z " />
<path
nodetypes="ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc"
id="flowRoot1890"
d="M 10.867188,44.877953 C 6.2812618,42.124849 5.2205914,52.366268 10.409215,49.892431 C 12.389385,49.295568 14.988045,43.912658 10.867188,44.877953 z M 15.167969,43.987328 C 14.919826,46.33724 16.617756,52.554202 12.298734,50.536918 C 9.8041142,52.312916 6.0580855,52.958674 4.5023123,49.583386 C 2.6350454,45.257322 7.3033103,42.298712 11.046443,43.361059 C 15.247185,41.320786 9.4930286,38.338264 7.1068792,40.322138 C 3.4374421,40.01388 7.406407,37.201407 9.3495087,37.962912 C 12.44212,37.877788 15.556534,40.380131 15.171751,43.648912 L 15.169638,43.83797 L 15.167969,43.987328 z M 30.53125,43.553734 C 29.638794,45.911558 32.49467,50.463872 28.779999,51.070944 C 26.888088,47.702306 30.931621,41.190257 25.58365,40.046147 C 20.73931,40.312798 21.252194,45.910871 22.001439,49.154066 C 21.84253,51.828309 18.790577,51.39256 19.585585,48.673738 C 19.851829,45.693864 18.285332,39.630301 20.986983,38.702911 C 23.508461,40.80889 25.761847,35.731906 28.452459,38.686226 C 29.921454,39.793194 30.827618,41.709992 30.53125,43.553734 z M 38.807,49.770223 C 42.369034,50.768974 44.523344,46.328688 43.700521,43.358983 C 40.402775,35.546453 32.491199,44.344131 38.807,49.770223 z M 39.941406,38.034203 C 52.085872,39.705642 43.204854,59.098342 34.688722,48.642968 C 32.591886,44.778031 34.383109,38.440132 39.291369,38.051827 L 39.941406,38.034203 L 39.941406,38.034203 z M 51.660156,34.624046 C 49.815978,37.850583 54.789459,38.666222 55.83437,39.23566 C 54.140746,40.715733 50.093061,40.12158 51.562461,43.76212 C 51.004096,46.980523 52.486847,50.037723 55.670614,50.54595 C 53.547788,53.782616 48.41793,50.035495 49.349973,46.519692 C 50.339877,43.686471 48.78131,40.671166 48.467256,38.48357 C 51.099926,37.413599 47.886512,33.32283 51.660156,34.624046 z M 69.859375,43.553734 C 68.966918,45.911557 71.822794,50.463872 68.108124,51.070944 C 66.216214,47.702306 70.259746,41.190256 64.911775,40.046145 C 60.222418,40.285904 60.439194,45.757728 61.367942,48.953683 C 60.705448,53.064855 57.788626,49.900134 58.838379,47.289738 C 58.969709,43.381174 59.006437,39.455087 58.607404,35.565714 C 59.356025,31.632413 62.368269,34.68013 61.01352,37.194316 C 60.38417,39.302538 61.469087,40.653476 62.996248,38.474829 C 66.202089,36.826154 70.863269,39.826451 69.859375,43.553734 z M 85.410156,44.374046 C 83.244849,47.905533 76.447085,42.456344 75.976013,47.444052 C 76.913541,51.724548 83.275324,48.726196 84.393639,50.133773 C 82.109855,53.525123 76.421339,51.860111 74.285335,49.01336 C 71.258247,44.729984 74.614013,37.166516 80.254289,37.96756 C 83.286958,38.284495 85.833914,41.310745 85.410156,44.374046 z M 83.253906,43.741234 C 84.431319,39.039614 74.594812,38.687325 76.291886,43.335226 C 78.284783,44.796048 81.032856,43.090943 83.253906,43.741234 z M 96.554688,40.366234 C 93.290612,38.6882 90.622217,42.519635 90.728522,45.492665 C 90.881925,47.333676 92.330286,52.144465 89.028751,50.905988 C 88.95673,46.763963 88.353312,42.447207 89.31721,38.336643 C 91.040471,38.503437 92.207514,40.668181 93.421468,38.208298 C 94.902478,37.44973 97.690944,38.263668 96.554688,40.366234 z " />
<path
style="fill:#ff0000"
nodetypes="ccccccccccccccccccccccccccccccccccccccccccccccccccccc"
id="flowRoot1898"
d="M 17.026327,63.789847 C 0.7506376,64.058469 13.88279,66.387154 13.113883,69.323258 C 8.0472417,70.287093 3.5936285,63.565714 6.8090451,59.370548 C 8.7591553,55.717791 15.269922,55.198361 16.902068,59.393261 C 17.532581,60.758947 17.628237,62.396589 17.026327,63.789847 z M 15.306463,62.656109 C 18.852566,58.713773 7.6543584,56.609143 10.765803,61.304742 C 12.124789,62.217715 13.961359,61.705342 15.306463,62.656109 z M 31.307931,62.391383 C 27.130518,63.524026 24.669863,68.663004 27.470717,72.229472 C 25.946657,74.052316 24.253697,71.076237 24.857281,69.636909 C 23.737444,67.038428 17.399862,72.254246 19.386636,68.888657 C 23.159719,67.551193 22.398496,63.711301 22.06067,60.848671 C 24.064085,60.375294 24.370376,65.772689 27.167918,63.326048 C 28.350126,62.546369 29.927362,61.067531 31.307931,62.391383 z M 37.66875,70.598623 C 33.467314,66.62264 32.517064,77.972723 37.30626,74.466636 C 38.742523,73.853608 40.55904,70.38932 37.66875,70.598623 z M 41.677321,70.973131 C 42.340669,75.308182 36.926157,78.361257 33.331921,76.223155 C 29.43435,74.893988 30.618698,67.677232 35.003806,68.567885 C 37.137393,70.592854 42.140265,67.002221 37.656192,66.290007 C 35.242233,65.914214 35.166503,62.640757 38.036954,63.926404 C 40.847923,64.744926 43.227838,68.124735 41.677321,70.973131 z M 62.379099,76.647079 C 62.007404,78.560417 61.161437,84.034535 58.890565,82.010019 C 59.769679,79.039958 62.536382,72.229115 56.947899,72.765789 C 53.790416,73.570863 54.908257,80.968388 51.529286,79.496859 C 51.707831,76.559817 55.858125,71.896837 50.8321,70.678504 C 45.898113,69.907818 47.485944,75.735824 45.286883,78.034703 C 42.916393,76.333396 45.470823,71.647155 46.624124,69.414735 C 50.919507,67.906486 63.618534,70.878704 62.379099,76.647079 z M 66.426447,83.84905 C 67.616398,85.777591 62.114624,94.492698 62.351124,90.31711 C 63.791684,86.581961 65.730376,78.000636 67.391891,74.85575 C 71.027815,73.781175 76.383067,75.350289 76.591972,79.751898 C 77.048545,83.793048 73.066803,88.429945 68.842187,86.765936 C 67.624386,86.282034 66.56741,85.195132 66.426447,83.84905 z M 74.086569,81.803435 C 76.851893,78.050524 69.264402,74.310256 67.560734,78.378191 C 65.893402,80.594099 67.255719,83.775746 69.700555,84.718558 C 72.028708,85.902224 73.688639,83.888662 74.086569,81.803435 z M 82.318799,73.124577 C 84.30523,75.487059 81.655015,88.448086 78.247183,87.275736 C 78.991935,82.387828 81.291029,77.949394 82.318799,73.124577 z M 95.001985,87.684695 C 78.726298,87.953319 91.858449,90.281999 91.089542,93.218107 C 86.0229,94.18194 81.569287,87.460562 84.784701,83.265394 C 86.734814,79.612637 93.245582,79.09321 94.877729,83.28811 C 95.508245,84.653796 95.603892,86.291438 95.001985,87.684695 z M 93.282122,86.550957 C 96.828223,82.608621 85.630017,80.503993 88.741461,85.199592 C 90.100447,86.112565 91.937018,85.600192 93.282122,86.550957 z " />
</g>
</svg>

After

Width:  |  Height:  |  Size: 9.8 KiB

BIN
tests/normal/pdf-sample.pdf Normal file

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,68 @@
{\rtf1\ansi\ansicpg1252\uc1\deff0\stshfdbch0\stshfloch0\stshfhich0\stshfbi0\deflang1033\deflangfe1033{\fonttbl{\f0\froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\f1\fswiss\fcharset0\fprq2{\*\panose 020b0604020202020204}Arial;}
{\f2\fmodern\fcharset0\fprq1{\*\panose 02070309020205020404}Courier New;}{\f3\froman\fcharset2\fprq2{\*\panose 05050102010706020507}Symbol;}{\f10\fnil\fcharset2\fprq2{\*\panose 05000000000000000000}Wingdings;}
{\f121\froman\fcharset238\fprq2 Times New Roman CE;}{\f122\froman\fcharset204\fprq2 Times New Roman Cyr;}{\f124\froman\fcharset161\fprq2 Times New Roman Greek;}{\f125\froman\fcharset162\fprq2 Times New Roman Tur;}
{\f126\froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\f127\froman\fcharset178\fprq2 Times New Roman (Arabic);}{\f128\froman\fcharset186\fprq2 Times New Roman Baltic;}{\f129\froman\fcharset163\fprq2 Times New Roman (Vietnamese);}
{\f131\fswiss\fcharset238\fprq2 Arial CE;}{\f132\fswiss\fcharset204\fprq2 Arial Cyr;}{\f134\fswiss\fcharset161\fprq2 Arial Greek;}{\f135\fswiss\fcharset162\fprq2 Arial Tur;}{\f136\fswiss\fcharset177\fprq2 Arial (Hebrew);}
{\f137\fswiss\fcharset178\fprq2 Arial (Arabic);}{\f138\fswiss\fcharset186\fprq2 Arial Baltic;}{\f139\fswiss\fcharset163\fprq2 Arial (Vietnamese);}{\f141\fmodern\fcharset238\fprq1 Courier New CE;}{\f142\fmodern\fcharset204\fprq1 Courier New Cyr;}
{\f144\fmodern\fcharset161\fprq1 Courier New Greek;}{\f145\fmodern\fcharset162\fprq1 Courier New Tur;}{\f146\fmodern\fcharset177\fprq1 Courier New (Hebrew);}{\f147\fmodern\fcharset178\fprq1 Courier New (Arabic);}
{\f148\fmodern\fcharset186\fprq1 Courier New Baltic;}{\f149\fmodern\fcharset163\fprq1 Courier New (Vietnamese);}}{\colortbl;\red0\green0\blue0;\red0\green0\blue255;\red0\green255\blue255;\red0\green255\blue0;\red255\green0\blue255;\red255\green0\blue0;
\red255\green255\blue0;\red255\green255\blue255;\red0\green0\blue128;\red0\green128\blue128;\red0\green128\blue0;\red128\green0\blue128;\red128\green0\blue0;\red128\green128\blue0;\red128\green128\blue128;\red192\green192\blue192;}{\stylesheet{
\ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \fs24\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 \snext0 Normal;}{\s1\ql \li0\ri0\sb240\sa60\keepn\widctlpar\aspalpha\aspnum\faauto\outlinelevel0\adjustright\rin0\lin0\itap0
\b\f1\fs32\lang1033\langfe1033\kerning32\cgrid\langnp1033\langfenp1033 \sbasedon0 \snext0 \styrsid2294299 heading 1;}{\*\cs10 \additive \ssemihidden Default Paragraph Font;}{\*
\ts11\tsrowd\trftsWidthB3\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\tscellwidthfts0\tsvertalt\tsbrdrt\tsbrdrl\tsbrdrb\tsbrdrr\tsbrdrdgl\tsbrdrdgr\tsbrdrh\tsbrdrv
\ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \fs20\lang1024\langfe1024\cgrid\langnp1024\langfenp1024 \snext11 \ssemihidden Normal Table;}{\*\ts15\tsrowd\trbrdrt\brdrs\brdrw10 \trbrdrl\brdrs\brdrw10 \trbrdrb\brdrs\brdrw10
\trbrdrr\brdrs\brdrw10 \trbrdrh\brdrs\brdrw10 \trbrdrv\brdrs\brdrw10 \trftsWidthB3\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\tscellwidthfts0\tsvertalt\tsbrdrt\tsbrdrl\tsbrdrb\tsbrdrr\tsbrdrdgl\tsbrdrdgr\tsbrdrh\tsbrdrv
\ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \fs20\lang1024\langfe1024\cgrid\langnp1024\langfenp1024 \sbasedon11 \snext15 \styrsid2294299 Table Grid;}{
\s16\ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \fs20\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 \sbasedon0 \snext16 \ssemihidden \styrsid1792631 footnote text;}{\*\cs17 \additive \super
\sbasedon10 \ssemihidden \styrsid1792631 footnote reference;}}{\*\latentstyles\lsdstimax156\lsdlockeddef0}{\*\listtable{\list\listtemplateid-767292450\listhybrid{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360
\levelindent0{\leveltext\leveltemplateid67698689\'01\u-3913 ?;}{\levelnumbers;}\f3\fbias0 \fi-360\li720\jclisttab\tx720\lin720 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\leveltext
\leveltemplateid67698691\'01o;}{\levelnumbers;}\f2\fbias0 \fi-360\li1440\jclisttab\tx1440\lin1440 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\leveltext\leveltemplateid67698693
\'01\u-3929 ?;}{\levelnumbers;}\f10\fbias0 \fi-360\li2160\jclisttab\tx2160\lin2160 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\leveltext\leveltemplateid67698689\'01\u-3913 ?;}{\levelnumbers
;}\f3\fbias0 \fi-360\li2880\jclisttab\tx2880\lin2880 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\leveltext\leveltemplateid67698691\'01o;}{\levelnumbers;}\f2\fbias0 \fi-360\li3600
\jclisttab\tx3600\lin3600 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\leveltext\leveltemplateid67698693\'01\u-3929 ?;}{\levelnumbers;}\f10\fbias0 \fi-360\li4320\jclisttab\tx4320\lin4320 }
{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\leveltext\leveltemplateid67698689\'01\u-3913 ?;}{\levelnumbers;}\f3\fbias0 \fi-360\li5040\jclisttab\tx5040\lin5040 }{\listlevel\levelnfc23
\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\leveltext\leveltemplateid67698691\'01o;}{\levelnumbers;}\f2\fbias0 \fi-360\li5760\jclisttab\tx5760\lin5760 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0
\levelfollow0\levelstartat1\levelspace360\levelindent0{\leveltext\leveltemplateid67698693\'01\u-3929 ?;}{\levelnumbers;}\f10\fbias0 \fi-360\li6480\jclisttab\tx6480\lin6480 }{\listname ;}\listid687222349}}{\*\listoverridetable{\listoverride\listid687222349
\listoverridecount0\ls1}}{\*\rsidtbl \rsid1792631\rsid2294299}{\*\generator Microsoft Word 11.0.6113;}{\info{\title This is a test RTF}{\author Nate}{\operator Nate}{\version2}}\widowctrl\ftnbj\aenddoc\noxlattoyen\expshrtn\noultrlspc\dntblnsbdb\nospaceforul\hyphcaps0\formshade\horzdoc\dgmargin\dghspace180\dgvspace180\dghorigin1800\dgvorigin1440
\dghshow1\dgvshow1\jexpand\viewkind1\viewscale80\pgbrdrhead\pgbrdrfoot\splytwnine\ftnlytwnine\htmautsp\nolnhtadjtbl\useltbaln\alntblind\lytcalctblwd\lyttblrtgr\lnbrkrule\nobrkwrptbl\snaptogridincell\allowfieldendsel\wrppunct
\asianbrkrule\rsidroot2294299\newtblstyruls\nogrowautofit \fet0{\*\ftnsep \pard\plain \ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \fs24\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\insrsid1792631 \chftnsep
\par }}{\*\ftnsepc \pard\plain \ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \fs24\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\insrsid1792631 \chftnsepc
\par }}{\*\aftnsep \pard\plain \ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \fs24\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\insrsid1792631 \chftnsep
\par }}{\*\aftnsepc \pard\plain \ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \fs24\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\insrsid1792631 \chftnsepc
\par }}\sectd \linex0\endnhere\sectlinegrid360\sectdefaultcl\sftnbj {\*\pnseclvl1\pnucrm\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl2\pnucltr\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl3\pndec\pnstart1\pnindent720\pnhang {\pntxta .}}
{\*\pnseclvl4\pnlcltr\pnstart1\pnindent720\pnhang {\pntxta )}}{\*\pnseclvl5\pndec\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl6\pnlcltr\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl7\pnlcrm\pnstart1\pnindent720\pnhang
{\pntxtb (}{\pntxta )}}{\*\pnseclvl8\pnlcltr\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl9\pnlcrm\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}\pard\plain
\s1\ql \li0\ri0\sb240\sa60\keepn\widctlpar\aspalpha\aspnum\faauto\outlinelevel0\adjustright\rin0\lin0\itap0\pararsid2294299 \b\f1\fs32\lang1033\langfe1033\kerning32\cgrid\langnp1033\langfenp1033 {\insrsid2294299 This is a test RTF
\par }\pard\plain \ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid2294299 \fs24\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\insrsid2294299 Hi! I\rquote m a test file. This is some }{\b\insrsid2294299 bold}{
\insrsid2294299 text, and some }{\i\insrsid2294299 italic}{\insrsid2294299 text, as well as some }{\ul\insrsid2294299 underline}{\insrsid2294299 text. And a bit of }{\v\insrsid2294299\charrsid2294299 hidden}{\insrsid2294299 text. So we\rquote
re going to end this paragraph here and go on to a nice little list:
\par
\par {\listtext\pard\plain\f3\insrsid2294299 \loch\af3\dbch\af0\hich\f3 \'b7\tab}}\pard \ql \fi-360\li720\ri0\widctlpar\jclisttab\tx720\aspalpha\aspnum\faauto\ls1\adjustright\rin0\lin720\itap0\pararsid2294299 {\insrsid2294299 Item 1
\par {\listtext\pard\plain\f3\insrsid2294299 \loch\af3\dbch\af0\hich\f3 \'b7\tab}Item 2
\par {\listtext\pard\plain\f3\insrsid2294299 \loch\af3\dbch\af0\hich\f3 \'b7\tab}Item 3
\par {\listtext\pard\plain\f3\insrsid2294299 \loch\af3\dbch\af0\hich\f3 \'b7\tab}Item 4
\par }\pard \ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid2294299 {\insrsid2294299
\par And now comes a fun table:
\par
\par }\trowd \irow0\irowband0\ts15\trgaph108\trleft-108\trbrdrt\brdrs\brdrw10 \trbrdrl\brdrs\brdrw10 \trbrdrb\brdrs\brdrw10 \trbrdrr\brdrs\brdrw10 \trbrdrh\brdrs\brdrw10 \trbrdrv\brdrs\brdrw10
\trftsWidth1\trftsWidthB3\trautofit1\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\tbllkhdrrows\tbllklastrow\tbllkhdrcols\tbllklastcol \clvertalt\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10
\cltxlrtb\clftsWidth3\clwWidth2952\clshdrawnil \cellx2844\clvertalt\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb\clftsWidth3\clwWidth2952\clshdrawnil \cellx5796\clvertalt\clbrdrt\brdrs\brdrw10
\clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb\clftsWidth3\clwWidth2952\clshdrawnil \cellx8748\pard\plain \ql \li0\ri0\widctlpar\intbl\aspalpha\aspnum\faauto\adjustright\rin0\lin0\pararsid2294299\yts15
\fs24\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\insrsid2294299 Cell 1\cell Cell 2
\par More in cell 2\cell Cell 3\cell }\pard\plain \ql \li0\ri0\widctlpar\intbl\aspalpha\aspnum\faauto\adjustright\rin0\lin0 \fs24\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\insrsid2294299 \trowd \irow0\irowband0\ts15\trgaph108\trleft-108\trbrdrt
\brdrs\brdrw10 \trbrdrl\brdrs\brdrw10 \trbrdrb\brdrs\brdrw10 \trbrdrr\brdrs\brdrw10 \trbrdrh\brdrs\brdrw10 \trbrdrv\brdrs\brdrw10
\trftsWidth1\trftsWidthB3\trautofit1\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\tbllkhdrrows\tbllklastrow\tbllkhdrcols\tbllklastcol \clvertalt\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10
\cltxlrtb\clftsWidth3\clwWidth2952\clshdrawnil \cellx2844\clvertalt\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb\clftsWidth3\clwWidth2952\clshdrawnil \cellx5796\clvertalt\clbrdrt\brdrs\brdrw10
\clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb\clftsWidth3\clwWidth2952\clshdrawnil \cellx8748\row }\pard\plain \ql \li0\ri0\widctlpar\intbl\aspalpha\aspnum\faauto\adjustright\rin0\lin0\pararsid2294299\yts15
\fs24\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\insrsid2294299 Next row\cell Next row \cell Next row\cell }\pard\plain \ql \li0\ri0\widctlpar\intbl\aspalpha\aspnum\faauto\adjustright\rin0\lin0
\fs24\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\insrsid2294299 \trowd \irow1\irowband1\lastrow \ts15\trgaph108\trleft-108\trbrdrt\brdrs\brdrw10 \trbrdrl\brdrs\brdrw10 \trbrdrb\brdrs\brdrw10 \trbrdrr\brdrs\brdrw10 \trbrdrh\brdrs\brdrw10 \trbrdrv
\brdrs\brdrw10 \trftsWidth1\trftsWidthB3\trautofit1\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\tbllkhdrrows\tbllklastrow\tbllkhdrcols\tbllklastcol \clvertalt\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr
\brdrs\brdrw10 \cltxlrtb\clftsWidth3\clwWidth2952\clshdrawnil \cellx2844\clvertalt\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb\clftsWidth3\clwWidth2952\clshdrawnil \cellx5796\clvertalt\clbrdrt
\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb\clftsWidth3\clwWidth2952\clshdrawnil \cellx8748\row }\pard \ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid2294299 {
\insrsid2294299
\par A page break:
\par \page And here we\rquote re on the next page.}{\insrsid1792631 }{\insrsid2294299
\par }{\insrsid1792631 This para has a }{\cs17\super\insrsid1792631 \chftn {\footnote \pard\plain \s16\ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \fs20\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\cs17\super\insrsid1792631
\chftn }{\insrsid1792631 This is the actual content of the footnote.}}}{\insrsid1792631 footnote.
\par And here\rquote s yet another paragraph. }{\insrsid1792631\charrsid2294299
\par }}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1 +0,0 @@
geneve_1564.pdf

Binary file not shown.

View File

@ -1,181 +0,0 @@
#VRML V1.0 ascii
DEF Ez3d_Scene Separator {
DEF Ez3d_Viewer Switch {
whichChild -3
DEF Title Info {
string ""
}
DEF Viewer Info {
string "walk"
}
DEF BackgroundColor Info {
string "0.000000 0.000000 0.000000"
}
DEF Cameras Switch {
whichChild 0
PerspectiveCamera {
position 1.12948 2.23403 9.88775
orientation 0.192413 -0.894646 -0.403219 6.1811
focalDistance 9.51932
heightAngle 0.683577
}
}
}
DEF Ez3d_Environment Switch {
whichChild -3
}
DEF Ez3d_Objects Switch {
whichChild -3
DEF Cube001 Separator {
Transform {
scaleFactor 1.76681 0.168973 1.76681
}
Texture2 {
filename "brick.gif"
wrapS REPEAT
wrapT REPEAT
}
DEF Ez3d_Material Material {
ambientColor 0.0375 0.0375 0.0375
diffuseColor 0.425 0.425 0.425
specularColor 0.807547 0.807547 0.807547
shininess 0.5
transparency 0
}
ShapeHints {
vertexOrdering COUNTERCLOCKWISE
shapeType UNKNOWN_SHAPE_TYPE
creaseAngle 0.523599
}
DEF Ez3d_Cube001 Cube {
}
}
DEF Cylinder001 Separator {
Transform {
translation 0.0806677 1.11004 4.76837e-007
scaleFactor 0.176417 0.977561 0.156127
center -5.52615e-009 4.58545e-008 0
}
Texture2 {
filename "oak.gif"
wrapS REPEAT
wrapT REPEAT
}
ShapeHints {
vertexOrdering COUNTERCLOCKWISE
shapeType UNKNOWN_SHAPE_TYPE
creaseAngle 0.523599
}
DEF Ez3d_Cylinder001 Cylinder {
}
}
DEF Group002 Separator {
Transform {
translation 0.0497642 2.50354 -0.281726
center 0.174201 0.111916 0.106615
}
DEF Sphere001_2 Separator {
Transform {
translation -0.0497642 -0.0265024 -0.479769
}
DEF Ez3d_Material Material {
ambientColor 0.0908158 0.776699 0.00823493
diffuseColor 0.0935403 0.8 0.00848198
specularColor 0.114655 0.980583 0.0103966
shininess 0.184466
transparency 0.485437
}
ShapeHints {
vertexOrdering COUNTERCLOCKWISE
shapeType UNKNOWN_SHAPE_TYPE
creaseAngle 0.523599
}
DEF Ez3d_Sphere001_2 Sphere {
}
}
DEF Sphere001 Separator {
Transform {
translation -0.0497642 -0.178993 0.25727
}
DEF Ez3d_Material Material {
ambientColor 0.0908158 0.776699 0.00823493
diffuseColor 0.0935403 0.8 0.00848198
specularColor 0.114655 0.980583 0.0103966
shininess 0.184466
transparency 0.485437
}
ShapeHints {
vertexOrdering COUNTERCLOCKWISE
shapeType UNKNOWN_SHAPE_TYPE
creaseAngle 0.523599
}
DEF Ez3d_Sphere001 Sphere {
}
}
DEF Group001 Separator {
Transform {
translation 0.0995283 0.205495 0.222499
center 0.0746732 -0.077734 -0.115884
}
DEF Sphere001_4 Separator {
Transform {
translation 0.557986 0.19733 0.231768
}
DEF Ez3d_Material Material {
ambientColor 0.0908158 0.776699 0.00823493
diffuseColor 0.0935403 0.8 0.00848198
specularColor 0.114655 0.980583 0.0103966
shininess 0.184466
transparency 0.485437
}
ShapeHints {
vertexOrdering COUNTERCLOCKWISE
shapeType UNKNOWN_SHAPE_TYPE
creaseAngle 0.523599
}
DEF Ez3d_Sphere001_4 Sphere {
}
}
DEF Sphere001_1 Separator {
Transform {
translation -0.149346 -0.352797 0.470501
}
DEF Ez3d_Material Material {
ambientColor 0.0908158 0.776699 0.00823493
diffuseColor 0.0935403 0.8 0.00848198
specularColor 0.114655 0.980583 0.0103966
shininess 0.184466
transparency 0.485437
}
ShapeHints {
vertexOrdering COUNTERCLOCKWISE
shapeType UNKNOWN_SHAPE_TYPE
creaseAngle 0.523599
}
DEF Ez3d_Sphere001_1 Sphere {
}
}
DEF Sphere001_3 Separator {
Transform {
translation -0.40864 0.155468 -0.702269
}
DEF Ez3d_Material Material {
ambientColor 0.0908158 0.776699 0.00823493
diffuseColor 0.0935403 0.8 0.00848198
specularColor 0.114655 0.980583 0.0103966
shininess 0.184466
transparency 0.485437
}
ShapeHints {
vertexOrdering COUNTERCLOCKWISE
shapeType UNKNOWN_SHAPE_TYPE
creaseAngle 0.523599
}
DEF Ez3d_Sphere001_3 Sphere {
}
}
}
}
}
}

View File

@ -1 +0,0 @@
blah

View File

@ -1 +0,0 @@
This is a test.

View File

@ -1 +0,0 @@
This is a test.

Binary file not shown.

View File

@ -2,57 +2,130 @@
# -*- coding: utf-8 -*-
import os
import shutil
import unittest.mock as mock
import pytest
import yaml
from tests.logging import save_logs
try:
from bin.filecheck import KittenGroomerFileCheck, File, main
from bin.filecheck import KittenGroomerFileCheck, File, GroomerLogger
NODEPS = False
except ImportError:
NODEPS = True
pytestmark = pytest.mark.skipif(NODEPS, reason="Dependencies aren't installed")
fixture = pytest.fixture
skip = pytest.mark.skip
skipif_nodeps = pytest.mark.skipif(NODEPS,
reason="Dependencies aren't installed")
parametrize = pytest.mark.parametrize
@skipif_nodeps
class TestSystem:
@fixture
def valid_groomer(self):
src_path = os.path.join(os.getcwd(), 'tests/src_valid')
dst_path = self.make_dst_dir_path(src_path)
return KittenGroomerFileCheck(src_path, dst_path, debug=True)
@fixture
def invalid_groomer(self):
src_path = os.path.join(os.getcwd(), 'tests/src_invalid')
dst_path = self.make_dst_dir_path(src_path)
return KittenGroomerFileCheck(src_path, dst_path, debug=True)
def make_dst_dir_path(self, src_dir_path):
dst_path = src_dir_path + '_dst'
shutil.rmtree(dst_path, ignore_errors=True)
os.makedirs(dst_path, exist_ok=True)
return dst_path
def test_filecheck_src_valid(self, valid_groomer):
valid_groomer.run()
test_description = "filecheck_valid"
save_logs(valid_groomer, test_description)
def test_filecheck_src_invalid(self, invalid_groomer):
invalid_groomer.run()
test_description = "filecheck_invalid"
save_logs(invalid_groomer, test_description)
NORMAL_FILES_PATH = 'tests/normal/'
DANGEROUS_FILES_PATH = 'tests/dangerous/'
UNCATEGORIZED_FILES_PATH = 'tests/uncategorized'
CATALOG_PATH = 'tests/file_catalog.yaml'
class TestFileHandling:
def test_autorun(self):
# Run on a single autorun file, confirm that it gets flagged as dangerous
# TODO: build out these and other methods for individual file cases
pass
class SampleFile():
def __init__(self, path, exp_dangerous):
self.path = path
self.filename = os.path.basename(path)
self.exp_dangerous = exp_dangerous
def gather_sample_files():
file_catalog = read_file_catalog()
normal_catalog = file_catalog['normal']
dangerous_catalog = file_catalog['dangerous']
sample_files = create_sample_files(
normal_catalog,
NORMAL_FILES_PATH,
exp_dangerous=False
)
sample_files.extend(create_sample_files(
dangerous_catalog,
DANGEROUS_FILES_PATH,
exp_dangerous=True
))
return sample_files
def read_file_catalog():
with open(os.path.abspath(CATALOG_PATH)) as catalog_file:
catalog_dict = yaml.safe_load(catalog_file)
return catalog_dict
def create_sample_files(file_catalog, dir_path, exp_dangerous):
sample_files = []
dir_files = set_of_files(dir_path)
# Sorted to make the test cases occur in a consistent order, doesn't have to be
for filename, file_dict in sorted(file_catalog.items()):
full_path = os.path.abspath(os.path.join(dir_path, filename))
try:
dir_files.remove(full_path)
newfile = SampleFile(full_path, exp_dangerous)
newfile.xfail = file_dict.get('xfail', False)
sample_files.append(newfile)
except KeyError:
raise FileNotFoundError("{} could not be found".format(filename))
for file_path in dir_files:
newfile = SampleFile(file_path, exp_dangerous)
newfile.xfail = False
sample_files.append(newfile)
return sample_files
def set_of_files(dir_path):
"""Set of all full file paths in `dir_path`."""
full_dir_path = os.path.abspath(dir_path)
file_paths = set()
for path in os.listdir(full_dir_path):
full_path = os.path.join(full_dir_path, path)
if os.path.isfile(full_path):
file_paths.add(full_path)
return file_paths
def get_filename(sample_file):
return os.path.basename(sample_file.path)
@fixture(scope='module')
def src_dir_path(tmpdir_factory):
return tmpdir_factory.mktemp('src').strpath
@fixture(scope='module')
def dest_dir_path(tmpdir_factory):
return tmpdir_factory.mktemp('dest').strpath
@fixture
def groomer(dest_dir_path):
dummy_src_path = os.getcwd()
return KittenGroomerFileCheck(dummy_src_path, dest_dir_path, debug=True)
@fixture
def mock_logger(dest_dir_path):
return mock.MagicMock(spec=GroomerLogger)
@parametrize(
argnames="sample_file",
argvalues=gather_sample_files(),
ids=get_filename)
def test_sample_files(mock_logger, sample_file, groomer, dest_dir_path):
if sample_file.xfail:
pytest.xfail(reason='Marked xfail in file catalog')
file_dest_path = os.path.join(dest_dir_path, sample_file.filename)
file = File(sample_file.path, file_dest_path, mock_logger)
groomer.process_file(file)
assert file.is_dangerous == sample_file.exp_dangerous
def test_uncategorized(tmpdir):
src_path = os.path.abspath(UNCATEGORIZED_FILES_PATH)
dst_path = tmpdir.strpath
groomer = KittenGroomerFileCheck(src_path, dst_path, debug=True)
groomer.run()

View File

@ -1,5 +1,10 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
from datetime import datetime
import datetime
import pytest
def save_logs(groomer, test_description):

View File

@ -2,249 +2,320 @@
# -*- coding: utf-8 -*-
import os
import unittest.mock as mock
import pytest
from kittengroomer import FileBase, KittenGroomerBase
from kittengroomer.helpers import ImplementationRequired
skip = pytest.mark.skip
xfail = pytest.mark.xfail
fixture = pytest.fixture
# FileBase
class TestFileBase:
@fixture
def source_file(self):
return 'tests/src_valid/blah.conf'
@fixture(scope='class')
def src_dir_path(self, tmpdir_factory):
return tmpdir_factory.mktemp('src').strpath
@fixture(scope='class')
def dest_dir_path(self, tmpdir_factory):
return tmpdir_factory.mktemp('dest').strpath
@fixture
def dest_file(self):
return 'tests/dst/blah.conf'
@fixture
def generic_conf_file(self, source_file, dest_file):
return FileBase(source_file, dest_file)
@fixture
def symlink_file(self, tmpdir):
def tmpfile_path(self, tmpdir):
file_path = tmpdir.join('test.txt')
file_path.write('testing')
file_path = file_path.strpath
symlink_path = tmpdir.join('symlinked.txt')
return file_path.strpath
@fixture
def symlink_file_path(self, tmpdir, tmpfile_path):
symlink_path = tmpdir.join('symlinked')
symlink_path = symlink_path.strpath
os.symlink(file_path, symlink_path)
return FileBase(symlink_path, symlink_path)
os.symlink(tmpfile_path, symlink_path)
return symlink_path
@fixture
def temp_file(self, tmpdir):
file_path = tmpdir.join('test.txt')
file_path.write('testing')
file_path = file_path.strpath
return FileBase(file_path, file_path)
def text_file(self):
with mock.patch(
'kittengroomer.helpers.magic.from_file',
return_value='text/plain'
):
src_path = 'src/test.txt'
dst_path = 'dst/test.txt'
file = FileBase(src_path, dst_path)
return file
@fixture
def temp_file_no_ext(self, tmpdir):
file_path = tmpdir.join('test')
file_path.write('testing')
file_path = file_path.strpath
return FileBase(file_path, file_path)
# Constructor behavior
@fixture
def file_marked_dangerous(self, generic_conf_file):
generic_conf_file.make_dangerous()
return generic_conf_file
@mock.patch('kittengroomer.helpers.magic')
def test_init_identify_filename(self, mock_libmagic):
"""Init should identify the filename correctly for src_path."""
src_path = 'src/test.txt'
dst_path = 'dst/test.txt'
file = FileBase(src_path, dst_path)
assert file.filename == 'test.txt'
@fixture
def file_marked_unknown(self, generic_conf_file):
generic_conf_file.make_unknown()
return generic_conf_file
@fixture
def file_marked_binary(self, generic_conf_file):
generic_conf_file.make_binary()
return generic_conf_file
@fixture(params=[
FileBase.make_dangerous,
FileBase.make_unknown,
FileBase.make_binary
])
def file_marked_all_parameterized(self, request, generic_conf_file):
request.param(generic_conf_file)
return generic_conf_file
# What are the various things that can go wrong with file paths? We should have fixtures for them
# What should FileBase do if it's given a path that isn't a file (doesn't exist or is a dir)? Currently magic throws an exception
# We should probably catch everytime that happens and tell the user explicitly happened (and maybe put it in the log)
def test_create_broken(self, tmpdir):
with pytest.raises(TypeError):
FileBase()
with pytest.raises(FileNotFoundError):
FileBase('', '')
with pytest.raises(IsADirectoryError):
FileBase(tmpdir.strpath, tmpdir.strpath)
# TODO: are there other cases here? path to a file that doesn't exist? permissions?
def test_init(self, generic_conf_file):
generic_conf_file
def test_extension_uppercase(self, tmpdir):
file_path = tmpdir.join('TEST.TXT')
file_path.write('testing')
file_path = file_path.strpath
file = FileBase(file_path, file_path)
@mock.patch('kittengroomer.helpers.magic')
def test_init_identify_extension(self, mock_libmagic):
"""Init should identify the extension for src_path."""
src_path = 'src/test.txt'
dst_path = 'dst/test.txt'
file = FileBase(src_path, dst_path)
assert file.extension == '.txt'
def test_mimetypes(self, generic_conf_file):
assert generic_conf_file.mimetype == 'text/plain'
assert generic_conf_file.main_type == 'text'
assert generic_conf_file.sub_type == 'plain'
assert generic_conf_file.has_mimetype
# Need to test something without a mimetype
# Need to test something that's a directory
# Need to test something that causes the unicode exception
@mock.patch('kittengroomer.helpers.magic')
def test_init_uppercase_extension(self, mock_libmagic):
"""Init should coerce uppercase extension to lowercase"""
src_path = 'src/TEST.TXT'
dst_path = 'dst/TEST.TXT'
file = FileBase(src_path, dst_path)
assert file.extension == '.txt'
def test_has_mimetype_no_main_type(self, generic_conf_file):
generic_conf_file.main_type = ''
assert generic_conf_file.has_mimetype is False
@mock.patch('kittengroomer.helpers.magic')
def test_has_extension_true(self, mock_libmagic):
"""If the file has an extension, has_extension should == True."""
src_path = 'src/test.txt'
dst_path = 'dst/test.txt'
file = FileBase(src_path, dst_path)
assert file.has_extension is True
def test_has_mimetype_no_sub_type(self, generic_conf_file):
generic_conf_file.sub_type = ''
assert generic_conf_file.has_mimetype is False
@mock.patch('kittengroomer.helpers.magic')
def test_has_extension_false(self, mock_libmagic):
"""If the file has no extension, has_extensions should == False."""
src_path = 'src/test'
dst_path = 'dst/test'
file = FileBase(src_path, dst_path)
assert file.has_extension is False
def test_has_extension(self, temp_file, temp_file_no_ext):
assert temp_file.has_extension is True
print(temp_file_no_ext.extension)
assert temp_file_no_ext.has_extension is False
def test_init_file_doesnt_exist(self):
"""Init should raise an exception if the file doesn't exist."""
with pytest.raises(FileNotFoundError):
FileBase('', '')
def test_set_property(self, generic_conf_file):
generic_conf_file.set_property('test', True)
assert generic_conf_file.get_property('test') is True
assert generic_conf_file.get_property('wrong') is None
def test_init_srcpath_is_directory(self, tmpdir):
"""Init should raise an exception if given a path to a directory."""
with pytest.raises(IsADirectoryError):
FileBase(tmpdir.strpath, tmpdir.strpath)
def test_marked_dangerous(self, file_marked_all_parameterized):
file_marked_all_parameterized.make_dangerous()
assert file_marked_all_parameterized.is_dangerous is True
# Should work regardless of weird paths??
# Should check file path alteration behavior as well
@mock.patch('kittengroomer.helpers.magic')
def test_init_symlink(self, mock_libmagic, symlink_file_path):
"""Init should properly identify symlinks."""
file = FileBase(symlink_file_path, '')
assert file.mimetype == 'inode/symlink'
def test_generic_dangerous(self, generic_conf_file):
assert generic_conf_file.is_dangerous is False
generic_conf_file.make_dangerous()
assert generic_conf_file.is_dangerous is True
@mock.patch('kittengroomer.helpers.magic')
def test_is_symlink_attribute(self, mock_libmagic, symlink_file_path):
"""If a file is a symlink, is_symlink should return True."""
file = FileBase(symlink_file_path, '')
assert file.is_symlink is True
def test_has_symlink(self, tmpdir):
file_path = tmpdir.join('test.txt')
file_path.write('testing')
file_path = file_path.strpath
symlink_path = tmpdir.join('symlinked.txt')
symlink_path = symlink_path.strpath
os.symlink(file_path, symlink_path)
file = FileBase(file_path, file_path)
symlink = FileBase(symlink_path, symlink_path)
assert file.is_symlink is False
assert symlink.is_symlink is True
def test_init_mimetype_attribute_assigned_correctly(self):
"""When libmagic returns a given mimetype, the mimetype should be
assigned properly."""
with mock.patch('kittengroomer.helpers.magic.from_file',
return_value='text/plain'):
file = FileBase('', '')
assert file.mimetype == 'text/plain'
def test_has_symlink_fixture(self, symlink_file):
assert symlink_file.is_symlink is True
def test_maintype_and_subtype_attributes(self):
"""If a file has a full mimetype, maintype and subtype should ==
the appropriate values."""
with mock.patch('kittengroomer.helpers.magic.from_file',
return_value='text/plain'):
file = FileBase('', '')
assert file.maintype == 'text'
assert file.subtype == 'plain'
def test_generic_make_unknown(self, generic_conf_file):
assert generic_conf_file.is_unknown is False
generic_conf_file.make_unknown()
assert generic_conf_file.is_unknown
# given a FileBase object with no marking, should do the right things
def test_has_mimetype_no_full_type(self):
"""If a file doesn't have a full mimetype has_mimetype should == False."""
with mock.patch('kittengroomer.helpers.magic.from_file',
return_value='data'):
file = FileBase('', '')
assert file.has_mimetype is False
def test_marked_make_unknown(self, file_marked_all_parameterized):
file = file_marked_all_parameterized
if file.is_unknown:
file.make_unknown()
assert file.is_unknown
else:
assert file.is_unknown is False
file.make_unknown()
assert file.is_unknown is False
# given a FileBase object with an unrecognized marking, should ???
def test_has_mimetype_mimetype_is_none(self):
"""If a file doesn't have a full mimetype has_mimetype should == False."""
with mock.patch('kittengroomer.helpers.FileBase._determine_mimetype',
return_value=None):
file = FileBase('', '')
assert file.has_mimetype is False
def test_generic_make_binary(self, generic_conf_file):
assert generic_conf_file.is_binary is False
generic_conf_file.make_binary()
assert generic_conf_file.is_binary
# File properties
def test_marked_make_binary(self, file_marked_all_parameterized):
file = file_marked_all_parameterized
if file.is_dangerous:
file.make_binary()
assert file.is_binary is False
else:
file.make_binary()
assert file.is_binary
def get_property_doesnt_exist(self, text_file):
"""Trying to get a property that doesn't exist should return None."""
assert text_file.get_property('thing') is None
def test_force_ext_change(self, generic_conf_file):
assert generic_conf_file.has_extension
assert generic_conf_file.get_property('extension') == '.conf'
assert os.path.splitext(generic_conf_file.dst_path)[1] == '.conf'
generic_conf_file.force_ext('.txt')
assert os.path.splitext(generic_conf_file.dst_path)[1] == '.txt'
assert generic_conf_file.get_property('extension') == '.txt'
# should be able to handle weird paths
def get_property_builtin(self, text_file):
"""Getting a property that's been set should return that property."""
assert text_file.get_property('is_dangerous') is False
def test_force_ext_correct(self, generic_conf_file):
assert generic_conf_file.has_extension
assert generic_conf_file.get_property('extension') == '.conf'
generic_conf_file.force_ext('.conf')
assert os.path.splitext(generic_conf_file.dst_path)[1] == '.conf'
assert generic_conf_file.get_property('force_ext') is None
# shouldn't change a file's extension if it already is right
def get_property_user_defined(self, text_file):
"""Getting a user defined property should return that property."""
text_file._user_defined = {'thing': True}
assert text_file.get_property('thing') is True
def test_create_metadata_file(self, temp_file):
metadata_file_path = temp_file.create_metadata_file('.metadata.txt')
with open(metadata_file_path, 'w+') as metadata_file:
metadata_file.write('Have some metadata!')
# Shouldn't be able to make a metadata file with no extension
assert temp_file.create_metadata_file('') is False
# if metadata file already exists
# if there is no metadata to write should this work?
def set_property_user_defined(self, text_file):
"""Setting a non-default property should make it available for
get_property."""
text_file.set_property('thing', True)
assert text_file.get_property('thing') is True
def test_safe_copy(self, generic_conf_file):
generic_conf_file.safe_copy()
# check that safe copy can handle weird file path inputs
def set_property_builtin(self, text_file):
"""Setting a builtin property should assign that property."""
text_file.set_property('is_dangerous', True)
assert text_file.get_property('is_dangerous') is True
def test_add_new_description(self, text_file):
"""Adding a new description should add it to the list of description strings."""
text_file.add_description('thing')
assert text_file.get_property('description_string') == ['thing']
def test_add_description_exists(self, text_file):
"""Adding a description that already exists shouldn't duplicate it."""
text_file.add_description('thing')
text_file.add_description('thing')
assert text_file.get_property('description_string') == ['thing']
def test_add_description_not_string(self, text_file):
"""Adding a description that isn't a string should raise an error."""
with pytest.raises(TypeError):
text_file.add_description(123)
def test_add_new_error(self, text_file):
"""Adding a new error should add it to the dict of errors."""
text_file.add_error(Exception, 'thing')
assert text_file.get_property('_errors') == {Exception: 'thing'}
def test_normal_file_mark_dangerous(self, text_file):
"""Marking a file dangerous should identify it as dangerous."""
text_file.make_dangerous()
assert text_file.is_dangerous is True
def test_normal_file_mark_dangerous_filename_change(self, text_file):
"""Marking a file dangerous should mangle the filename."""
filename = text_file.filename
text_file.make_dangerous()
assert text_file.filename == 'DANGEROUS_{}_DANGEROUS'.format(filename)
def test_normal_file_mark_dangerous_add_description(self, text_file):
"""Marking a file as dangerous and passing in a description should add
that description to the file."""
text_file.make_dangerous('thing')
assert text_file.get_property('description_string') == ['thing']
def test_dangerous_file_mark_dangerous(self, text_file):
"""Marking a dangerous file as dangerous should do nothing, and the
file should remain dangerous."""
text_file.make_dangerous()
text_file.make_dangerous()
assert text_file.is_dangerous is True
def test_force_ext_change_filepath(self, text_file):
"""Force_ext should modify the path of the file to end in the
new extension."""
text_file.force_ext('.test')
assert text_file.dst_path.endswith('.test')
def test_force_ext_add_dot(self, text_file):
"""Force_ext should add a dot to an extension given without one."""
text_file.force_ext('test')
assert text_file.dst_path.endswith('.test')
def test_force_ext_change_extension_attr(self, text_file):
"""Force_ext should modify the extension attribute"""
text_file.force_ext('.thing')
assert text_file.extension == '.thing'
def test_force_ext_no_change(self, text_file):
"""Force_ext should do nothing if the current extension is the same
as the new extension."""
text_file.force_ext('.txt')
assert text_file.extension == '.txt'
assert '.txt.txt' not in text_file.dst_path
def test_safe_copy_calls_copy(self, src_dir_path, dest_dir_path):
"""Calling safe_copy should copy the file from the correct path to
the correct destination path."""
file_path = os.path.join(src_dir_path, 'test.txt')
with open(file_path, 'w+') as file:
file.write('')
dst_path = os.path.join(dest_dir_path, 'test.txt')
with mock.patch('kittengroomer.helpers.magic.from_file',
return_value='text/plain'):
file = FileBase(file_path, dst_path)
with mock.patch('kittengroomer.helpers.shutil.copy') as mock_copy:
file.safe_copy()
mock_copy.assert_called_once_with(file_path, dst_path)
def test_safe_copy_makedir_doesnt_exist(self):
"""Calling safe_copy should create intermediate directories in the path
if they don't exist."""
pass
def test_safe_copy_makedir_exists(self):
"""Calling safe_copy when some intermediate directories exist should
result in the creation of the full path and the file."""
pass
def test_create_metadata_file_new(self):
pass
def test_create_metadata_file_already_exists(self):
pass
class TestLogger:
class TestLogging:
def test_computehash(self):
"""Computehash should return the correct sha256 hash of a given file."""
pass
class TestKittenGroomerBase:
@fixture
def source_directory(self):
return 'tests/src_invalid'
@fixture(scope='class')
def src_dir_path(self, tmpdir_factory):
return tmpdir_factory.mktemp('src').strpath
@fixture(scope='class')
def dest_dir_path(self, tmpdir_factory):
return tmpdir_factory.mktemp('dest').strpath
@fixture
def dest_directory(self):
return 'tests/dst'
def groomer(self, src_dir_path, dest_dir_path):
return KittenGroomerBase(src_dir_path, dest_dir_path)
@fixture
def generic_groomer(self, source_directory, dest_directory):
return KittenGroomerBase(source_directory, dest_directory)
def test_create(self, generic_groomer):
assert generic_groomer
def test_instantiation(self, source_directory, dest_directory):
KittenGroomerBase(source_directory, dest_directory)
def test_list_all_files(self, tmpdir):
def test_list_all_files_includes_file(self, tmpdir, groomer):
"""Calling list_all_files should include files in the given path."""
file = tmpdir.join('test.txt')
file.write('testing')
files = groomer.list_all_files(tmpdir.strpath)
assert file.strpath in files
def test_list_all_files_excludes_dir(self, tmpdir, groomer):
"""Calling list_all_files shouldn't include directories in the given
path."""
testdir = tmpdir.join('testdir')
os.mkdir(testdir.strpath)
simple_groomer = KittenGroomerBase(tmpdir.strpath, tmpdir.strpath)
files = simple_groomer.list_all_files(simple_groomer.src_root_path)
assert file.strpath in files
files = groomer.list_all_files(tmpdir.strpath)
assert testdir.strpath not in files
def test_safe_remove(self, groomer, src_dir_path):
"""Calling safe_remove should not raise an Exception if trying to
remove a file that doesn't exist."""
groomer.safe_remove(os.path.join(src_dir_path, 'thing'))
def test_safe_mkdir_file_exists(self, groomer, dest_dir_path):
"""Calling safe_mkdir should not overwrite an existing directory."""
filepath = os.path.join(dest_dir_path, 'thing')
os.mkdir(filepath)
groomer.safe_mkdir(filepath)
def test_processdir_not_implemented(self, groomer):
"""Calling processdir should raise an Implementation Required error."""
with pytest.raises(ImplementationRequired):
groomer.processdir('.', '.')

View File

@ -1,12 +0,0 @@
src_invalid
===========
-
src_valid
=========
- Example.jpg: image/jpeg, obtained from wikipedia.org
- blah.conf: text file with a .conf extension