mirror of https://github.com/CIRCL/PyCIRCLean
commit
79b15fd7da
|
@ -68,7 +68,11 @@ target/
|
||||||
|
|
||||||
# Project specific
|
# Project specific
|
||||||
tests/dst/*
|
tests/dst/*
|
||||||
|
tests/*_dst
|
||||||
tests/test_logs/*
|
tests/test_logs/*
|
||||||
!tests/**/.keepdir
|
!tests/**/.keepdir
|
||||||
!tests/src_invalid/*
|
!tests/src_invalid/*
|
||||||
!tests/src_valid/*
|
!tests/src_valid/*
|
||||||
|
pdfid.py
|
||||||
|
# Plugins are pdfid stuff
|
||||||
|
plugin_*
|
||||||
|
|
|
@ -66,8 +66,8 @@ install:
|
||||||
- rm fraunhoferlibrary.zip
|
- rm fraunhoferlibrary.zip
|
||||||
- 7z x -p42 42.zip
|
- 7z x -p42 42.zip
|
||||||
# Some random samples
|
# Some random samples
|
||||||
- wget http://www.sample-videos.com/audio/mp3/india-national-anthem.mp3
|
# - wget http://www.sample-videos.com/audio/mp3/india-national-anthem.mp3
|
||||||
- wget http://www.sample-videos.com/video/mp4/720/big_buck_bunny_720p_1mb.mp4
|
# - wget http://www.sample-videos.com/video/mp4/720/big_buck_bunny_720p_1mb.mp4
|
||||||
- wget http://thewalter.net/stef/software/rtfx/sample.rtf
|
- wget http://thewalter.net/stef/software/rtfx/sample.rtf
|
||||||
- popd
|
- popd
|
||||||
|
|
||||||
|
|
111
README.md
111
README.md
|
@ -1,13 +1,12 @@
|
||||||
[](https://travis-ci.org/CIRCL/PyCIRCLean)
|
[](https://travis-ci.org/CIRCL/PyCIRCLean)
|
||||||
[](https://codecov.io/github/CIRCL/PyCIRCLean?branch=master)
|
[](https://codecov.io/github/CIRCL/PyCIRCLean?branch=master)
|
||||||
[](https://coveralls.io/github/Rafiot/PyCIRCLean?branch=master)
|
|
||||||
|
|
||||||
# PyCIRCLean
|
# PyCIRCLean
|
||||||
|
|
||||||
PyCIRCLean is the core Python code used by [CIRCLean](https://github.com/CIRCL/Circlean/), an open-source
|
PyCIRCLean is the core Python code used by [CIRCLean](https://github.com/CIRCL/Circlean/), an open-source
|
||||||
USB key and document sanitizer created by [CIRCL](https://www.circl.lu/). This module has been separated from the
|
USB key and document sanitizer created by [CIRCL](https://www.circl.lu/). This module has been separated from the
|
||||||
device-specific scripts and can be used for dedicated security applications to sanitize documents from hostile environments
|
device-specific scripts and can be used for dedicated security applications to sanitize documents from hostile environments
|
||||||
to trusted environments. PyCIRCLean is currently Python 3.3+ only.
|
to trusted environments. PyCIRCLean is currently Python 3.3+ compatible.
|
||||||
|
|
||||||
# Installation
|
# Installation
|
||||||
|
|
||||||
|
@ -23,10 +22,13 @@ pip install .
|
||||||
|
|
||||||
# How to use PyCIRCLean
|
# How to use PyCIRCLean
|
||||||
|
|
||||||
PyCIRCLean is a simple Python library to handle file checking and sanitization. PyCIRCLean is designed as a simple library
|
PyCIRCLean is a simple Python library to handle file checking and sanitization.
|
||||||
that can be overloaded to cover specific checking and sanitization workflows in different organizations like industrial
|
PyCIRCLean is designed to be extended to cover specific checking
|
||||||
|
and sanitization workflows in different organizations such as industrial
|
||||||
environments or restricted/classified ICT environments. A series of practical examples utilizing PyCIRCLean can be found
|
environments or restricted/classified ICT environments. A series of practical examples utilizing PyCIRCLean can be found
|
||||||
in the [./examples](./examples) directory.
|
in the [./examples](./examples) directory. Note: for commits beyond version 2.2.0 these
|
||||||
|
examples are not guaranteed to work with the PyCIRCLean API. Please check [helpers.py](./kittengroomer/helpers.py) or
|
||||||
|
[filecheck.py](./bin/filecheck.py) to see the new API interface.
|
||||||
|
|
||||||
The following simple example using PyCIRCLean will only copy files with a .conf extension matching the 'text/plain' MIME
|
The following simple example using PyCIRCLean will only copy files with a .conf extension matching the 'text/plain' MIME
|
||||||
type. If any other file is found in the source directory, the files won't be copied to the destination directory.
|
type. If any other file is found in the source directory, the files won't be copied to the destination directory.
|
||||||
|
@ -41,94 +43,79 @@ from kittengroomer import FileBase, KittenGroomerBase, main
|
||||||
|
|
||||||
|
|
||||||
# Extension
|
# Extension
|
||||||
configfiles = {'.conf': 'text/plain'}
|
class Config:
|
||||||
|
configfiles = {'.conf': 'text/plain'}
|
||||||
|
|
||||||
|
|
||||||
class FileSpec(FileBase):
|
class FileSpec(FileBase):
|
||||||
|
|
||||||
def __init__(self, src_path, dst_path):
|
def __init__(self, src_path, dst_path):
|
||||||
''' Init file object, set the extension '''
|
"""Init file object, set the extension."""
|
||||||
super(FileSpec, self).__init__(src_path, dst_path)
|
super(FileSpec, self).__init__(src_path, dst_path)
|
||||||
|
self.valid_files = {}
|
||||||
a, self.extension = os.path.splitext(self.src_path)
|
a, self.extension = os.path.splitext(self.src_path)
|
||||||
self.mimetype = magic.from_file(self.src_path, mime=True).decode("utf-8")
|
self.mimetype = magic.from_file(self.src_path, mime=True).decode("utf-8")
|
||||||
|
# The initial version will only accept the file extensions/mimetypes listed here.
|
||||||
|
self.valid_files.update(Config.configfiles)
|
||||||
|
|
||||||
|
def check(self):
|
||||||
|
valid = True
|
||||||
|
expected_mime = self.valid_files.get(self.extension)
|
||||||
|
if expected_mime is None:
|
||||||
|
# Unexpected extension => disallowed
|
||||||
|
valid = False
|
||||||
|
compare_ext = 'Extension: {} - Expected: {}'.format(self.cur_file.extension, ', '.join(self.valid_files.keys()))
|
||||||
|
elif self.mimetype != expected_mime:
|
||||||
|
# Unexpected mimetype => disallowed
|
||||||
|
valid = False
|
||||||
|
compare_mime = 'Mime: {} - Expected: {}'.format(self.cur_file.mimetype, expected_mime)
|
||||||
|
self.add_log_details('valid', valid)
|
||||||
|
if valid:
|
||||||
|
self.cur_file.log_string = 'Extension: {} - MimeType: {}'.format(self.cur_file.extension, self.cur_file.mimetype)
|
||||||
|
else:
|
||||||
|
self.should_copy = False
|
||||||
|
if compare_ext is not None:
|
||||||
|
self.add_log_string(compare_ext)
|
||||||
|
else:
|
||||||
|
self.add_log_string(compare_mime)
|
||||||
|
if self.should_copy:
|
||||||
|
self.safe_copy()
|
||||||
|
self.write_log()
|
||||||
|
|
||||||
|
|
||||||
class KittenGroomerSpec(KittenGroomerBase):
|
class KittenGroomerSpec(KittenGroomerBase):
|
||||||
|
|
||||||
def __init__(self, root_src=None, root_dst=None):
|
def __init__(self, root_src=None, root_dst=None):
|
||||||
'''
|
"""Initialize the basics of the copy."""
|
||||||
Initialize the basics of the copy
|
|
||||||
'''
|
|
||||||
if root_src is None:
|
if root_src is None:
|
||||||
root_src = os.path.join(os.sep, 'media', 'src')
|
root_src = os.path.join(os.sep, 'media', 'src')
|
||||||
if root_dst is None:
|
if root_dst is None:
|
||||||
root_dst = os.path.join(os.sep, 'media', 'dst')
|
root_dst = os.path.join(os.sep, 'media', 'dst')
|
||||||
super(KittenGroomerSpec, self).__init__(root_src, root_dst)
|
super(KittenGroomerSpec, self).__init__(root_src, root_dst)
|
||||||
self.valid_files = {}
|
|
||||||
|
|
||||||
# The initial version will only accept the file extensions/mimetypes listed here.
|
|
||||||
self.valid_files.update(configfiles)
|
|
||||||
|
|
||||||
def _print_log(self):
|
|
||||||
'''
|
|
||||||
Print the logs related to the current file being processed
|
|
||||||
'''
|
|
||||||
tmp_log = self.log_name.fields(**self.cur_file.log_details)
|
|
||||||
if not self.cur_file.log_details.get('valid'):
|
|
||||||
tmp_log.warning(self.cur_file.log_string)
|
|
||||||
else:
|
|
||||||
tmp_log.debug(self.cur_file.log_string)
|
|
||||||
|
|
||||||
def processdir(self):
|
def processdir(self):
|
||||||
'''
|
"""Main function doing the processing."""
|
||||||
Main function doing the processing
|
|
||||||
'''
|
|
||||||
to_copy = []
|
to_copy = []
|
||||||
error = []
|
error = []
|
||||||
for srcpath in self._list_all_files(self.src_root_dir):
|
for srcpath in self._list_all_files(self.src_root_dir):
|
||||||
valid = True
|
dstpath = srcpath.replace(self.src_root_dir, self.dst_root_dir)
|
||||||
self.log_name.info('Processing {}', srcpath.replace(self.src_root_dir + '/', ''))
|
cur_file = FileSpec(srcpath, dstpath)
|
||||||
self.cur_file = FileSpec(srcpath, srcpath.replace(self.src_root_dir, self.dst_root_dir))
|
cur_file.check()
|
||||||
expected_mime = self.valid_files.get(self.cur_file.extension)
|
|
||||||
if expected_mime is None:
|
|
||||||
# Unexpected extension => disallowed
|
|
||||||
valid = False
|
|
||||||
compare_ext = 'Extension: {} - Expected: {}'.format(self.cur_file.extension, ', '.join(self.valid_files.keys()))
|
|
||||||
elif self.cur_file.mimetype != expected_mime:
|
|
||||||
# Unexpected mimetype => disallowed
|
|
||||||
valid = False
|
|
||||||
compare_mime = 'Mime: {} - Expected: {}'.format(self.cur_file.mimetype, expected_mime)
|
|
||||||
self.cur_file.add_log_details('valid', valid)
|
|
||||||
if valid:
|
|
||||||
to_copy.append(self.cur_file)
|
|
||||||
self.cur_file.log_string = 'Extension: {} - MimeType: {}'.format(self.cur_file.extension, self.cur_file.mimetype)
|
|
||||||
else:
|
|
||||||
error.append(self.cur_file)
|
|
||||||
if compare_ext is not None:
|
|
||||||
self.cur_file.log_string = compare_ext
|
|
||||||
else:
|
|
||||||
self.cur_file.log_string = compare_mime
|
|
||||||
if len(error) > 0:
|
|
||||||
for f in error + to_copy:
|
|
||||||
self.cur_file = f
|
|
||||||
self._print_log()
|
|
||||||
else:
|
|
||||||
for f in to_copy:
|
|
||||||
self.cur_file = f
|
|
||||||
self._safe_copy()
|
|
||||||
self._print_log()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main(KittenGroomerSpec, ' Only copy some files, returns an error is anything else is found')
|
main(KittenGroomerSpec, ' Only copy some files, returns an error is anything else is found')
|
||||||
exit(0)
|
|
||||||
~~~
|
~~~
|
||||||
|
|
||||||
# How to contribute
|
# How to contribute
|
||||||
|
|
||||||
We welcome contributions (including bug fixes, new code workflows) via pull requests. We are interested in any new workflows
|
We welcome contributions (including bug fixes, new example file processing
|
||||||
that can be used to improve security in different organizations. If you see any potential enhancements required to support
|
workflows) via pull requests. We are particularly interested in any new workflows
|
||||||
your sanitization workflow, please feel free to open an issue. Read [CONTRIBUTING.md](/CONTRIBUTING.md) for more information.
|
that can be used to improve security in different organizations. If you see any
|
||||||
|
potential enhancements required to support your sanitization workflow, please feel
|
||||||
|
free to open an issue. Read [CONTRIBUTING.md](/CONTRIBUTING.md) for more
|
||||||
|
information.
|
||||||
|
|
||||||
|
|
||||||
# License
|
# License
|
||||||
|
|
982
bin/filecheck.py
982
bin/filecheck.py
File diff suppressed because it is too large
Load Diff
|
@ -339,7 +339,7 @@ class KittenGroomer(KittenGroomerBase):
|
||||||
archbomb_path = src_dir[:-len('_temp')]
|
archbomb_path = src_dir[:-len('_temp')]
|
||||||
self._safe_remove(archbomb_path)
|
self._safe_remove(archbomb_path)
|
||||||
|
|
||||||
for srcpath in self._list_all_files(src_dir):
|
for srcpath in self.list_all_files(src_dir):
|
||||||
self.cur_file = File(srcpath, srcpath.replace(src_dir, dst_dir))
|
self.cur_file = File(srcpath, srcpath.replace(src_dir, dst_dir))
|
||||||
|
|
||||||
self.log_name.info('Processing {} ({}/{})', srcpath.replace(src_dir + '/', ''),
|
self.log_name.info('Processing {} ({}/{})', srcpath.replace(src_dir + '/', ''),
|
||||||
|
|
|
@ -54,7 +54,7 @@ class KittenGroomerPier9(KittenGroomerBase):
|
||||||
'''
|
'''
|
||||||
Main function doing the processing
|
Main function doing the processing
|
||||||
'''
|
'''
|
||||||
for srcpath in self._list_all_files(self.src_root_dir):
|
for srcpath in self.list_all_files(self.src_root_dir):
|
||||||
self.log_name.info('Processing {}', srcpath.replace(self.src_root_dir + '/', ''))
|
self.log_name.info('Processing {}', srcpath.replace(self.src_root_dir + '/', ''))
|
||||||
self.cur_file = FilePier9(srcpath, srcpath.replace(self.src_root_dir, self.dst_root_dir))
|
self.cur_file = FilePier9(srcpath, srcpath.replace(self.src_root_dir, self.dst_root_dir))
|
||||||
if not self.cur_file.is_dangerous() and self.cur_file.extension in self.authorized_extensions:
|
if not self.cur_file.is_dangerous() and self.cur_file.extension in self.authorized_extensions:
|
||||||
|
|
|
@ -54,7 +54,7 @@ class KittenGroomerSpec(KittenGroomerBase):
|
||||||
'''
|
'''
|
||||||
to_copy = []
|
to_copy = []
|
||||||
error = []
|
error = []
|
||||||
for srcpath in self._list_all_files(self.src_root_dir):
|
for srcpath in self.list_all_files(self.src_root_dir):
|
||||||
valid = True
|
valid = True
|
||||||
self.log_name.info('Processing {}', srcpath.replace(self.src_root_dir + '/', ''))
|
self.log_name.info('Processing {}', srcpath.replace(self.src_root_dir + '/', ''))
|
||||||
self.cur_file = FileSpec(srcpath, srcpath.replace(self.src_root_dir, self.dst_root_dir))
|
self.cur_file = FileSpec(srcpath, srcpath.replace(self.src_root_dir, self.dst_root_dir))
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from .helpers import FileBase, KittenGroomerBase, main
|
from .helpers import FileBase, KittenGroomerBase, GroomerLogger, main
|
||||||
|
|
|
@ -9,13 +9,12 @@ desired behavior.
|
||||||
|
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import shutil
|
import shutil
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
import magic
|
import magic
|
||||||
from twiggy import quick_setup, log
|
import twiggy
|
||||||
|
|
||||||
|
|
||||||
class KittenGroomerError(Exception):
|
class KittenGroomerError(Exception):
|
||||||
|
@ -28,197 +27,268 @@ class KittenGroomerError(Exception):
|
||||||
|
|
||||||
class ImplementationRequired(KittenGroomerError):
|
class ImplementationRequired(KittenGroomerError):
|
||||||
"""Implementation required error."""
|
"""Implementation required error."""
|
||||||
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class FileBase(object):
|
class FileBase(object):
|
||||||
"""
|
"""
|
||||||
Base object for individual files in the source directory. Contains file
|
Base object for individual files in the source directory.
|
||||||
attributes and various helper methods. Subclass and add attributes
|
|
||||||
or methods relevant to a given implementation.
|
Contains file attributes and various helper methods.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, src_path, dst_path):
|
def __init__(self, src_path, dst_path, logger=None):
|
||||||
"""Initialized with the source path and expected destination path."""
|
"""
|
||||||
|
Initialized with the source path and expected destination path.
|
||||||
|
|
||||||
|
self.logger should be a logging object with an add_file method.
|
||||||
|
Create various properties and determine the file's mimetype.
|
||||||
|
"""
|
||||||
self.src_path = src_path
|
self.src_path = src_path
|
||||||
self.dst_path = dst_path
|
self.dst_path = dst_path
|
||||||
self.log_details = {'filepath': self.src_path}
|
self.filename = os.path.basename(self.src_path)
|
||||||
self.log_string = ''
|
self.logger = logger
|
||||||
self._determine_extension()
|
self._file_props = {
|
||||||
self._determine_mimetype()
|
'filepath': self.src_path,
|
||||||
|
'filename': self.filename,
|
||||||
|
'file_size': self.size,
|
||||||
|
'maintype': None,
|
||||||
|
'subtype': None,
|
||||||
|
'extension': None,
|
||||||
|
'safety_category': None,
|
||||||
|
'symlink': False,
|
||||||
|
'copied': False,
|
||||||
|
'file_string_set': set(),
|
||||||
|
'errors': {},
|
||||||
|
'user_defined': {}
|
||||||
|
}
|
||||||
|
self.extension = self._determine_extension()
|
||||||
|
self.set_property('extension', self.extension)
|
||||||
|
self.mimetype = self._determine_mimetype()
|
||||||
|
self.should_copy = True
|
||||||
|
self.main_type = None
|
||||||
|
self.sub_type = None
|
||||||
|
if self.mimetype:
|
||||||
|
self.main_type, self.sub_type = self._split_subtypes(self.mimetype)
|
||||||
|
if self.main_type:
|
||||||
|
self.set_property('maintype', self.main_type)
|
||||||
|
if self.sub_type:
|
||||||
|
self.set_property('subtype', self.sub_type)
|
||||||
|
|
||||||
def _determine_extension(self):
|
def _determine_extension(self):
|
||||||
_, ext = os.path.splitext(self.src_path)
|
_, ext = os.path.splitext(self.src_path)
|
||||||
self.extension = ext.lower()
|
ext = ext.lower()
|
||||||
|
if ext == '':
|
||||||
|
ext = None
|
||||||
|
return ext
|
||||||
|
|
||||||
def _determine_mimetype(self):
|
def _determine_mimetype(self):
|
||||||
if os.path.islink(self.src_path):
|
if os.path.islink(self.src_path):
|
||||||
# magic will throw an IOError on a broken symlink
|
# magic will throw an IOError on a broken symlink
|
||||||
self.mimetype = 'inode/symlink'
|
mimetype = 'inode/symlink'
|
||||||
|
self.set_property('symlink', os.readlink(self.src_path))
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
mt = magic.from_file(self.src_path, mime=True)
|
mt = magic.from_file(self.src_path, mime=True)
|
||||||
# magic will always return something, even if it's just 'data'
|
# Note: magic will always return something, even if it's just 'data'
|
||||||
except UnicodeEncodeError as e:
|
except UnicodeEncodeError as e:
|
||||||
# FIXME: The encoding of the file is broken (possibly UTF-16)
|
# FIXME: The encoding of the file is broken (possibly UTF-16)
|
||||||
mt = ''
|
# Note: one of the Travis files will trigger this exception
|
||||||
self.log_details.update({'UnicodeError': e})
|
self.add_error(e, '')
|
||||||
|
mt = None
|
||||||
try:
|
try:
|
||||||
self.mimetype = mt.decode("utf-8")
|
mimetype = mt.decode("utf-8")
|
||||||
except:
|
except:
|
||||||
self.mimetype = mt
|
mimetype = mt
|
||||||
if self.mimetype and '/' in self.mimetype:
|
return mimetype
|
||||||
self.main_type, self.sub_type = self.mimetype.split('/')
|
|
||||||
|
def _split_subtypes(self, mimetype):
|
||||||
|
if '/' in mimetype:
|
||||||
|
main_type, sub_type = mimetype.split('/')
|
||||||
else:
|
else:
|
||||||
self.main_type = ''
|
main_type, sub_type = None, None
|
||||||
self.sub_type = ''
|
return main_type, sub_type
|
||||||
|
|
||||||
|
@property
|
||||||
|
def size(self):
|
||||||
|
"""Filesize in bytes as an int, 0 if file does not exist."""
|
||||||
|
try:
|
||||||
|
size = os.path.getsize(self.src_path)
|
||||||
|
except FileNotFoundError:
|
||||||
|
size = 0
|
||||||
|
return size
|
||||||
|
|
||||||
|
@property
|
||||||
def has_mimetype(self):
|
def has_mimetype(self):
|
||||||
"""
|
"""True if file has a main and sub mimetype, else False."""
|
||||||
Returns True if file has a full mimetype, else False.
|
# TODO: broken mimetype checks should be done somewhere else.
|
||||||
|
# Should the check be by default or should we let the API consumer write it?
|
||||||
Returns False + updates log if self.main_type or self.sub_type
|
|
||||||
are not set.
|
|
||||||
"""
|
|
||||||
if not self.main_type or not self.sub_type:
|
if not self.main_type or not self.sub_type:
|
||||||
self.log_details.update({'broken_mime': True})
|
|
||||||
return False
|
return False
|
||||||
|
else:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@property
|
||||||
def has_extension(self):
|
def has_extension(self):
|
||||||
"""
|
"""True if self.extension is set, else False."""
|
||||||
Returns True if self.extension is set, else False.
|
if self.extension is None:
|
||||||
|
|
||||||
Returns False + updates self.log_details if self.extension is not set.
|
|
||||||
"""
|
|
||||||
if self.extension == '':
|
|
||||||
self.log_details.update({'no_extension': True})
|
|
||||||
return False
|
return False
|
||||||
|
else:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@property
|
||||||
def is_dangerous(self):
|
def is_dangerous(self):
|
||||||
"""Returns True if self.log_details contains 'dangerous'."""
|
"""True if file has been marked 'dangerous', else False."""
|
||||||
return ('dangerous' in self.log_details)
|
return self._file_props['safety_category'] is 'dangerous'
|
||||||
|
|
||||||
|
@property
|
||||||
def is_unknown(self):
|
def is_unknown(self):
|
||||||
"""Returns True if self.log_details contains 'unknown'."""
|
"""True if file has been marked 'unknown', else False."""
|
||||||
return ('unknown' in self.log_details)
|
return self._file_props['safety_category'] is 'unknown'
|
||||||
|
|
||||||
|
@property
|
||||||
def is_binary(self):
|
def is_binary(self):
|
||||||
"""returns True if self.log_details contains 'binary'."""
|
"""True if file has been marked 'binary', else False."""
|
||||||
return ('binary' in self.log_details)
|
return self._file_props['safety_category'] is 'binary'
|
||||||
|
|
||||||
|
@property
|
||||||
def is_symlink(self):
|
def is_symlink(self):
|
||||||
"""Returns True and updates log if file is a symlink."""
|
"""True if file is a symlink, else False."""
|
||||||
if self.has_mimetype() and self.main_type == 'inode' and self.sub_type == 'symlink':
|
if self._file_props['symlink'] is False:
|
||||||
self.log_details.update({'symlink': os.readlink(self.src_path)})
|
|
||||||
return True
|
|
||||||
return False
|
return False
|
||||||
|
else:
|
||||||
|
return True
|
||||||
|
|
||||||
def add_log_details(self, key, value):
|
def set_property(self, prop_string, value):
|
||||||
"""Takes a key + a value and adds them to self.log_details."""
|
|
||||||
self.log_details[key] = value
|
|
||||||
|
|
||||||
def make_dangerous(self):
|
|
||||||
"""
|
"""
|
||||||
Marks a file as dangerous.
|
Take a property and a value and add them to self._file_props.
|
||||||
|
|
||||||
Prepends and appends DANGEROUS to the destination file name
|
If prop_string is already in _file_props, set prop_string to value.
|
||||||
|
If prop_string not in _file_props, set prop_string to value in
|
||||||
|
_file_props['user_defined'].
|
||||||
|
"""
|
||||||
|
if prop_string in self._file_props.keys():
|
||||||
|
self._file_props[prop_string] = value
|
||||||
|
else:
|
||||||
|
self._file_props['user_defined'][prop_string] = value
|
||||||
|
|
||||||
|
def get_property(self, file_prop):
|
||||||
|
"""Get the value for a property in _file_props."""
|
||||||
|
# TODO: could probably be refactored
|
||||||
|
if file_prop in self._file_props:
|
||||||
|
return self._file_props[file_prop]
|
||||||
|
elif file_prop in self._file_props['user_defined']:
|
||||||
|
return self._file_props['user_defined'][file_prop]
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def add_error(self, error, info):
|
||||||
|
"""Add an error: info pair to _file_props['errors']."""
|
||||||
|
self._file_props['errors'].update({error: info})
|
||||||
|
|
||||||
|
def add_file_string(self, file_string):
|
||||||
|
"""Add a file descriptor string to _file_props."""
|
||||||
|
self._file_props['file_string_set'].add(file_string)
|
||||||
|
|
||||||
|
def make_dangerous(self, reason_string=None):
|
||||||
|
"""
|
||||||
|
Mark file as dangerous.
|
||||||
|
|
||||||
|
Prepend and append DANGEROUS to the destination file name
|
||||||
to help prevent double-click of death.
|
to help prevent double-click of death.
|
||||||
"""
|
"""
|
||||||
if self.is_dangerous():
|
if self.is_dangerous:
|
||||||
return
|
return
|
||||||
self.log_details['dangerous'] = True
|
self.set_property('safety_category', 'dangerous')
|
||||||
|
# LOG: store reason string somewhere and do something with it
|
||||||
path, filename = os.path.split(self.dst_path)
|
path, filename = os.path.split(self.dst_path)
|
||||||
self.dst_path = os.path.join(path, 'DANGEROUS_{}_DANGEROUS'.format(filename))
|
self.dst_path = os.path.join(path, 'DANGEROUS_{}_DANGEROUS'.format(filename))
|
||||||
|
|
||||||
def make_unknown(self):
|
def make_unknown(self):
|
||||||
"""Marks a file as an unknown type and prepends UNKNOWN to filename."""
|
"""Mark file as an unknown type and prepend UNKNOWN to filename."""
|
||||||
if self.is_dangerous() or self.is_binary():
|
if self.is_dangerous or self.is_binary:
|
||||||
return
|
return
|
||||||
self.log_details['unknown'] = True
|
self.set_property('safety_category', 'unknown')
|
||||||
path, filename = os.path.split(self.dst_path)
|
path, filename = os.path.split(self.dst_path)
|
||||||
self.dst_path = os.path.join(path, 'UNKNOWN_{}'.format(filename))
|
self.dst_path = os.path.join(path, 'UNKNOWN_{}'.format(filename))
|
||||||
|
|
||||||
def make_binary(self):
|
def make_binary(self):
|
||||||
"""Marks a file as a binary and appends .bin to filename."""
|
"""Mark file as a binary and append .bin to filename."""
|
||||||
if self.is_dangerous():
|
if self.is_dangerous:
|
||||||
return
|
return
|
||||||
self.log_details['binary'] = True
|
self.set_property('safety_category', 'binary')
|
||||||
path, filename = os.path.split(self.dst_path)
|
path, filename = os.path.split(self.dst_path)
|
||||||
self.dst_path = os.path.join(path, '{}.bin'.format(filename))
|
self.dst_path = os.path.join(path, '{}.bin'.format(filename))
|
||||||
|
|
||||||
|
def safe_copy(self, src=None, dst=None):
|
||||||
|
"""Copy file and create destination directories if needed."""
|
||||||
|
if src is None:
|
||||||
|
src = self.src_path
|
||||||
|
if dst is None:
|
||||||
|
dst = self.dst_path
|
||||||
|
try:
|
||||||
|
dst_path, filename = os.path.split(dst)
|
||||||
|
if not os.path.exists(dst_path):
|
||||||
|
os.makedirs(dst_path)
|
||||||
|
shutil.copy(src, dst)
|
||||||
|
except Exception as e:
|
||||||
|
self.add_error(e, '')
|
||||||
|
|
||||||
def force_ext(self, ext):
|
def force_ext(self, ext):
|
||||||
"""If dst_path does not end in ext, appends the ext and updates log."""
|
"""If dst_path does not end in ext, change it and edit _file_props."""
|
||||||
if not self.dst_path.endswith(ext):
|
if not self.dst_path.endswith(ext):
|
||||||
self.log_details['force_ext'] = True
|
self.set_property('force_ext', True)
|
||||||
self.dst_path += ext
|
self.dst_path += ext
|
||||||
|
if not self._file_props['extension'] == ext:
|
||||||
|
self.set_property('extension', ext)
|
||||||
|
|
||||||
|
def create_metadata_file(self, ext):
|
||||||
|
"""Create a separate file to hold metadata from this file."""
|
||||||
|
try:
|
||||||
|
# make sure we aren't overwriting anything
|
||||||
|
if os.path.exists(self.src_path + ext):
|
||||||
|
raise KittenGroomerError("Cannot create split metadata file for \"" +
|
||||||
|
self.dst_path + "\", type '" +
|
||||||
|
ext + "': File exists.")
|
||||||
|
else:
|
||||||
|
dst_dir_path, filename = os.path.split(self.dst_path)
|
||||||
|
if not os.path.exists(dst_dir_path):
|
||||||
|
os.makedirs(dst_dir_path)
|
||||||
|
# TODO: Check extension for leading "."
|
||||||
|
self.metadata_file_path = self.dst_path + ext
|
||||||
|
return self.metadata_file_path
|
||||||
|
except KittenGroomerError as e:
|
||||||
|
self.add_error(e, '')
|
||||||
|
return False
|
||||||
|
|
||||||
|
def write_log(self):
|
||||||
|
"""Write logs from file to self.logger."""
|
||||||
|
file_log = self.logger.add_file(self)
|
||||||
|
file_log.fields(**self._file_props)
|
||||||
|
|
||||||
|
|
||||||
class KittenGroomerBase(object):
|
class GroomerLogger(object):
|
||||||
"""Base object responsible for copy/sanitization process."""
|
"""Groomer logging interface."""
|
||||||
|
|
||||||
def __init__(self, root_src, root_dst, debug=False):
|
def __init__(self, root_dir_path, debug=False):
|
||||||
"""Initialized with path to source and dest directories."""
|
self.root_dir = root_dir_path
|
||||||
self.src_root_dir = root_src
|
self.log_dir_path = os.path.join(root_dir_path, 'logs')
|
||||||
self.dst_root_dir = root_dst
|
if os.path.exists(self.log_dir_path):
|
||||||
self.log_root_dir = os.path.join(self.dst_root_dir, 'logs')
|
shutil.rmtree(self.log_dir_path)
|
||||||
self._safe_rmtree(self.log_root_dir)
|
os.makedirs(self.log_dir_path)
|
||||||
self._safe_mkdir(self.log_root_dir)
|
self.log_processing = os.path.join(self.log_dir_path, 'processing.log')
|
||||||
self.log_processing = os.path.join(self.log_root_dir, 'processing.log')
|
self.log_content = os.path.join(self.log_dir_path, 'content.log')
|
||||||
self.log_content = os.path.join(self.log_root_dir, 'content.log')
|
twiggy.quick_setup(file=self.log_processing)
|
||||||
self.tree(self.src_root_dir)
|
self.log = twiggy.log.name('files')
|
||||||
|
if debug:
|
||||||
quick_setup(file=self.log_processing)
|
self.log_debug_err = os.path.join(self.log_dir_path, 'debug_stderr.log')
|
||||||
self.log_name = log.name('files')
|
self.log_debug_out = os.path.join(self.log_dir_path, 'debug_stdout.log')
|
||||||
self.resources_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data')
|
|
||||||
os.environ["PATH"] += os.pathsep + self.resources_path
|
|
||||||
|
|
||||||
self.cur_file = None
|
|
||||||
|
|
||||||
self.debug = debug
|
|
||||||
if self.debug:
|
|
||||||
self.log_debug_err = os.path.join(self.log_root_dir, 'debug_stderr.log')
|
|
||||||
self.log_debug_out = os.path.join(self.log_root_dir, 'debug_stdout.log')
|
|
||||||
else:
|
else:
|
||||||
self.log_debug_err = os.devnull
|
self.log_debug_err = os.devnull
|
||||||
self.log_debug_out = os.devnull
|
self.log_debug_out = os.devnull
|
||||||
|
|
||||||
def _computehash(self, path):
|
|
||||||
"""Returns a sha256 hash of a file at a given path."""
|
|
||||||
s = hashlib.sha256()
|
|
||||||
with open(path, 'rb') as f:
|
|
||||||
while True:
|
|
||||||
buf = f.read(0x100000)
|
|
||||||
if not buf:
|
|
||||||
break
|
|
||||||
s.update(buf)
|
|
||||||
return s.hexdigest()
|
|
||||||
|
|
||||||
def tree(self, base_dir, padding=' '):
|
def tree(self, base_dir, padding=' '):
|
||||||
"""Writes a graphical tree to the log for a given directory."""
|
"""Write a graphical tree to the log for `base_dir`."""
|
||||||
if sys.version_info.major == 2:
|
|
||||||
self.__tree_py2(base_dir, padding)
|
|
||||||
else:
|
|
||||||
self.__tree_py3(base_dir, padding)
|
|
||||||
|
|
||||||
def __tree_py2(self, base_dir, padding=' '):
|
|
||||||
with open(self.log_content, 'ab') as lf:
|
|
||||||
lf.write('#' * 80 + '\n')
|
|
||||||
lf.write('{}+- {}/\n'.format(padding, os.path.basename(os.path.abspath(base_dir))))
|
|
||||||
padding += '| '
|
|
||||||
files = sorted(os.listdir(base_dir))
|
|
||||||
for f in files:
|
|
||||||
curpath = os.path.join(base_dir, f)
|
|
||||||
if os.path.islink(curpath):
|
|
||||||
lf.write('{}+-- {}\t- Symbolic link to {}\n'.format(padding, f, os.readlink(curpath)))
|
|
||||||
elif os.path.isdir(curpath):
|
|
||||||
self.tree(curpath, padding)
|
|
||||||
elif os.path.isfile(curpath):
|
|
||||||
lf.write('{}+-- {}\t- {}\n'.format(padding, f, self._computehash(curpath)))
|
|
||||||
|
|
||||||
def __tree_py3(self, base_dir, padding=' '):
|
|
||||||
with open(self.log_content, 'ab') as lf:
|
with open(self.log_content, 'ab') as lf:
|
||||||
lf.write(bytes('#' * 80 + '\n', 'UTF-8'))
|
lf.write(bytes('#' * 80 + '\n', 'UTF-8'))
|
||||||
lf.write(bytes('{}+- {}/\n'.format(padding, os.path.basename(os.path.abspath(base_dir)).encode()), 'utf8'))
|
lf.write(bytes('{}+- {}/\n'.format(padding, os.path.basename(os.path.abspath(base_dir)).encode()), 'utf8'))
|
||||||
|
@ -233,80 +303,64 @@ class KittenGroomerBase(object):
|
||||||
elif os.path.isfile(curpath):
|
elif os.path.isfile(curpath):
|
||||||
lf.write('{}+-- {}\t- {}\n'.format(padding, f, self._computehash(curpath)).encode(errors='ignore'))
|
lf.write('{}+-- {}\t- {}\n'.format(padding, f, self._computehash(curpath)).encode(errors='ignore'))
|
||||||
|
|
||||||
# ##### Helpers #####
|
def _computehash(self, path):
|
||||||
def _safe_rmtree(self, directory):
|
"""Return a sha256 hash of a file at a given path."""
|
||||||
|
s = hashlib.sha256()
|
||||||
|
with open(path, 'rb') as f:
|
||||||
|
while True:
|
||||||
|
buf = f.read(0x100000)
|
||||||
|
if not buf:
|
||||||
|
break
|
||||||
|
s.update(buf)
|
||||||
|
return s.hexdigest()
|
||||||
|
|
||||||
|
def add_file(self, file):
|
||||||
|
"""Add a file to the log."""
|
||||||
|
return self.log.name('file.src_path')
|
||||||
|
|
||||||
|
|
||||||
|
class KittenGroomerBase(object):
|
||||||
|
"""Base object responsible for copy/sanitization process."""
|
||||||
|
|
||||||
|
def __init__(self, root_src, root_dst, debug=False):
|
||||||
|
"""Initialized with path to source and dest directories."""
|
||||||
|
self.src_root_dir = root_src
|
||||||
|
self.dst_root_dir = root_dst
|
||||||
|
self.debug = debug
|
||||||
|
self.cur_file = None
|
||||||
|
self.logger = GroomerLogger(self.dst_root_dir, debug)
|
||||||
|
|
||||||
|
def safe_rmtree(self, directory):
|
||||||
"""Remove a directory tree if it exists."""
|
"""Remove a directory tree if it exists."""
|
||||||
if os.path.exists(directory):
|
if os.path.exists(directory):
|
||||||
shutil.rmtree(directory)
|
shutil.rmtree(directory)
|
||||||
|
|
||||||
def _safe_remove(self, filepath):
|
def safe_remove(self, filepath):
|
||||||
"""Remove a file if it exists."""
|
"""Remove a file if it exists."""
|
||||||
if os.path.exists(filepath):
|
if os.path.exists(filepath):
|
||||||
os.remove(filepath)
|
os.remove(filepath)
|
||||||
|
|
||||||
def _safe_mkdir(self, directory):
|
def safe_mkdir(self, directory):
|
||||||
"""Make a directory if it does not exist."""
|
"""Make a directory if it does not exist."""
|
||||||
if not os.path.exists(directory):
|
if not os.path.exists(directory):
|
||||||
os.makedirs(directory)
|
os.makedirs(directory)
|
||||||
|
|
||||||
def _safe_copy(self, src=None, dst=None):
|
def list_all_files(self, directory):
|
||||||
"""Copy a file and create directory if needed."""
|
"""Generator yielding path to all of the files in a directory tree."""
|
||||||
if src is None:
|
|
||||||
src = self.cur_file.src_path
|
|
||||||
if dst is None:
|
|
||||||
dst = self.cur_file.dst_path
|
|
||||||
try:
|
|
||||||
dst_path, filename = os.path.split(dst)
|
|
||||||
self._safe_mkdir(dst_path)
|
|
||||||
shutil.copy(src, dst)
|
|
||||||
return True
|
|
||||||
except Exception as e:
|
|
||||||
# TODO: Logfile
|
|
||||||
print(e)
|
|
||||||
return False
|
|
||||||
|
|
||||||
def _safe_metadata_split(self, ext):
|
|
||||||
"""Create a separate file to hold this file's metadata."""
|
|
||||||
# TODO: fix logic in this method
|
|
||||||
dst = self.cur_file.dst_path
|
|
||||||
try:
|
|
||||||
if os.path.exists(self.cur_file.src_path + ext): # should we check dst_path as well?
|
|
||||||
raise KittenGroomerError("Cannot create split metadata file for \"" +
|
|
||||||
self.cur_file.dst_path + "\", type '" +
|
|
||||||
ext + "': File exists.")
|
|
||||||
dst_path, filename = os.path.split(dst)
|
|
||||||
self._safe_mkdir(dst_path)
|
|
||||||
return open(dst + ext, 'w+')
|
|
||||||
except Exception as e:
|
|
||||||
# TODO: Logfile
|
|
||||||
print(e)
|
|
||||||
return False
|
|
||||||
|
|
||||||
def _list_all_files(self, directory):
|
|
||||||
"""Generate an iterator over all the files in a directory tree."""
|
|
||||||
for root, dirs, files in os.walk(directory):
|
for root, dirs, files in os.walk(directory):
|
||||||
for filename in files:
|
for filename in files:
|
||||||
filepath = os.path.join(root, filename)
|
filepath = os.path.join(root, filename)
|
||||||
yield filepath
|
yield filepath
|
||||||
|
|
||||||
def _print_log(self):
|
|
||||||
"""
|
|
||||||
Print log, should be called after each file.
|
|
||||||
|
|
||||||
You probably want to reimplement it in the subclass.
|
|
||||||
"""
|
|
||||||
tmp_log = self.log_name.fields(**self.cur_file.log_details)
|
|
||||||
tmp_log.info('It did a thing.')
|
|
||||||
|
|
||||||
#######################
|
#######################
|
||||||
|
|
||||||
def processdir(self, src_dir=None, dst_dir=None):
|
# TODO: feels like this function doesn't need to exist if we move main()
|
||||||
"""
|
def processdir(self, src_dir, dst_dir):
|
||||||
Implement this function in your subclass to define file processing behavior.
|
"""Implement this function to define file processing behavior."""
|
||||||
"""
|
|
||||||
raise ImplementationRequired('Please implement processdir.')
|
raise ImplementationRequired('Please implement processdir.')
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: Maybe this shouldn't exist? It should probably get moved to filecheck since this isn't really API code
|
||||||
def main(kg_implementation, description='Call a KittenGroomer implementation to process files present in the source directory and copy them to the destination directory.'):
|
def main(kg_implementation, description='Call a KittenGroomer implementation to process files present in the source directory and copy them to the destination directory.'):
|
||||||
parser = argparse.ArgumentParser(prog='KittenGroomer', description=description)
|
parser = argparse.ArgumentParser(prog='KittenGroomer', description=description)
|
||||||
parser.add_argument('-s', '--source', type=str, help='Source directory')
|
parser.add_argument('-s', '--source', type=str, help='Source directory')
|
||||||
|
|
2
setup.py
2
setup.py
|
@ -4,7 +4,7 @@ from setuptools import setup
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name='kittengroomer',
|
name='kittengroomer',
|
||||||
version='2.1',
|
version='2.1.0',
|
||||||
author='Raphaël Vinot',
|
author='Raphaël Vinot',
|
||||||
author_email='raphael.vinot@circl.lu',
|
author_email='raphael.vinot@circl.lu',
|
||||||
maintainer='Raphaël Vinot',
|
maintainer='Raphaël Vinot',
|
||||||
|
|
|
@ -6,17 +6,17 @@ def save_logs(groomer, test_description):
|
||||||
test_log_path = 'tests/test_logs/{}.log'.format(test_description)
|
test_log_path = 'tests/test_logs/{}.log'.format(test_description)
|
||||||
with open(test_log_path, 'w+') as test_log:
|
with open(test_log_path, 'w+') as test_log:
|
||||||
test_log.write(divider.format('TEST LOG'))
|
test_log.write(divider.format('TEST LOG'))
|
||||||
with open(groomer.log_processing, 'r') as logfile:
|
with open(groomer.logger.log_processing, 'r') as logfile:
|
||||||
log = logfile.read()
|
log = logfile.read()
|
||||||
test_log.write(log)
|
test_log.write(log)
|
||||||
if groomer.debug:
|
if groomer.debug:
|
||||||
if os.path.exists(groomer.log_debug_err):
|
if os.path.exists(groomer.logger.log_debug_err):
|
||||||
test_log.write(divider.format('ERR LOG'))
|
test_log.write(divider.format('ERR LOG'))
|
||||||
with open(groomer.log_debug_err, 'r') as debug_err:
|
with open(groomer.logger.log_debug_err, 'r') as debug_err:
|
||||||
err = debug_err.read()
|
err = debug_err.read()
|
||||||
test_log.write(err)
|
test_log.write(err)
|
||||||
if os.path.exists(groomer.log_debug_out):
|
if os.path.exists(groomer.logger.log_debug_out):
|
||||||
test_log.write(divider.format('OUT LOG'))
|
test_log.write(divider.format('OUT LOG'))
|
||||||
with open(groomer.log_debug_out, 'r') as debug_out:
|
with open(groomer.logger.log_debug_out, 'r') as debug_out:
|
||||||
out = debug_out.read()
|
out = debug_out.read()
|
||||||
test_log.write(out)
|
test_log.write(out)
|
||||||
|
|
Binary file not shown.
After Width: | Height: | Size: 27 KiB |
|
@ -2,6 +2,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import shutil
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
@ -20,29 +21,46 @@ skipif_nodeps = pytest.mark.skipif(NODEPS,
|
||||||
class TestIntegration:
|
class TestIntegration:
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def src_valid(self):
|
def src_valid_path(self):
|
||||||
return os.path.join(os.getcwd(), 'tests/src_valid')
|
return os.path.join(os.getcwd(), 'tests/src_valid')
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def src_invalid(self):
|
def src_invalid_path(self):
|
||||||
return os.path.join(os.getcwd(), 'tests/src_invalid')
|
return os.path.join(os.getcwd(), 'tests/src_invalid')
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def dst(self):
|
def dst(self):
|
||||||
return os.path.join(os.getcwd(), 'tests/dst')
|
return os.path.join(os.getcwd(), 'tests/dst')
|
||||||
|
|
||||||
def test_filecheck(self, src_invalid, dst):
|
def test_filecheck_src_invalid(self, src_invalid_path):
|
||||||
groomer = KittenGroomerFileCheck(src_invalid, dst, debug=True)
|
dst_path = self.make_dst_dir_path(src_invalid_path)
|
||||||
groomer.processdir()
|
groomer = KittenGroomerFileCheck(src_invalid_path, dst_path, debug=True)
|
||||||
|
groomer.run()
|
||||||
test_description = "filecheck_invalid"
|
test_description = "filecheck_invalid"
|
||||||
save_logs(groomer, test_description)
|
save_logs(groomer, test_description)
|
||||||
|
|
||||||
def test_filecheck_2(self, src_valid, dst):
|
def test_filecheck_2(self, src_valid_path):
|
||||||
groomer = KittenGroomerFileCheck(src_valid, dst, debug=True)
|
dst_path = self.make_dst_dir_path(src_valid_path)
|
||||||
groomer.processdir()
|
groomer = KittenGroomerFileCheck(src_valid_path, dst_path, debug=True)
|
||||||
|
groomer.run()
|
||||||
test_description = "filecheck_valid"
|
test_description = "filecheck_valid"
|
||||||
save_logs(groomer, test_description)
|
save_logs(groomer, test_description)
|
||||||
|
|
||||||
|
def test_processdir(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def test_handle_archives(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def make_dst_dir_path(self, src_dir_path):
|
||||||
|
dst_path = src_dir_path + '_dst'
|
||||||
|
shutil.rmtree(dst_path, ignore_errors=True)
|
||||||
|
os.makedirs(dst_path, exist_ok=True)
|
||||||
|
return dst_path
|
||||||
|
|
||||||
|
|
||||||
class TestFileHandling:
|
class TestFileHandling:
|
||||||
|
def test_autorun(self):
|
||||||
|
# Run on a single autorun file, confirm that it gets flagged as dangerous
|
||||||
|
# TODO: build out these and other methods for individual file cases
|
||||||
pass
|
pass
|
||||||
|
|
|
@ -5,7 +5,7 @@ import os
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from kittengroomer import FileBase, KittenGroomerBase
|
from kittengroomer import FileBase, KittenGroomerBase, GroomerLogger
|
||||||
from kittengroomer.helpers import ImplementationRequired
|
from kittengroomer.helpers import ImplementationRequired
|
||||||
|
|
||||||
skip = pytest.mark.skip
|
skip = pytest.mark.skip
|
||||||
|
@ -30,7 +30,7 @@ class TestFileBase:
|
||||||
return FileBase(source_file, dest_file)
|
return FileBase(source_file, dest_file)
|
||||||
|
|
||||||
@fixture
|
@fixture
|
||||||
def symlink(self, tmpdir):
|
def symlink_file(self, tmpdir):
|
||||||
file_path = tmpdir.join('test.txt')
|
file_path = tmpdir.join('test.txt')
|
||||||
file_path.write('testing')
|
file_path.write('testing')
|
||||||
file_path = file_path.strpath
|
file_path = file_path.strpath
|
||||||
|
@ -65,7 +65,7 @@ class TestFileBase:
|
||||||
|
|
||||||
@fixture
|
@fixture
|
||||||
def file_marked_binary(self, generic_conf_file):
|
def file_marked_binary(self, generic_conf_file):
|
||||||
generic_conf_file.mark_binary()
|
generic_conf_file.make_binary()
|
||||||
return generic_conf_file
|
return generic_conf_file
|
||||||
|
|
||||||
@fixture(params=[
|
@fixture(params=[
|
||||||
|
@ -81,27 +81,17 @@ class TestFileBase:
|
||||||
# What should FileBase do if it's given a path that isn't a file (doesn't exist or is a dir)? Currently magic throws an exception
|
# What should FileBase do if it's given a path that isn't a file (doesn't exist or is a dir)? Currently magic throws an exception
|
||||||
# We should probably catch everytime that happens and tell the user explicitly happened (and maybe put it in the log)
|
# We should probably catch everytime that happens and tell the user explicitly happened (and maybe put it in the log)
|
||||||
|
|
||||||
def test_create(self):
|
|
||||||
file = FileBase('tests/src_valid/blah.conf', '/tests/dst/blah.conf')
|
|
||||||
|
|
||||||
def test_create_broken(self, tmpdir):
|
def test_create_broken(self, tmpdir):
|
||||||
with pytest.raises(TypeError):
|
with pytest.raises(TypeError):
|
||||||
file_no_args = FileBase()
|
FileBase()
|
||||||
with pytest.raises(FileNotFoundError):
|
with pytest.raises(FileNotFoundError):
|
||||||
file_empty_args = FileBase('', '')
|
FileBase('', '')
|
||||||
with pytest.raises(IsADirectoryError):
|
with pytest.raises(IsADirectoryError):
|
||||||
file_directory = FileBase(tmpdir.strpath, tmpdir.strpath)
|
FileBase(tmpdir.strpath, tmpdir.strpath)
|
||||||
# are there other cases here? path to a file that doesn't exist? permissions?
|
# TODO: are there other cases here? path to a file that doesn't exist? permissions?
|
||||||
|
|
||||||
def test_init(self, generic_conf_file):
|
def test_init(self, generic_conf_file):
|
||||||
file = generic_conf_file
|
generic_conf_file
|
||||||
assert file.log_details
|
|
||||||
assert file.log_details['filepath'] == file.src_path
|
|
||||||
assert file.extension == '.conf'
|
|
||||||
copied_log = file.log_details.copy()
|
|
||||||
file.log_details = ''
|
|
||||||
# assert file.log_details == copied_log # this fails for now, we need to make log_details undeletable
|
|
||||||
# we should probably check for more extensions here
|
|
||||||
|
|
||||||
def test_extension_uppercase(self, tmpdir):
|
def test_extension_uppercase(self, tmpdir):
|
||||||
file_path = tmpdir.join('TEST.TXT')
|
file_path = tmpdir.join('TEST.TXT')
|
||||||
|
@ -111,43 +101,42 @@ class TestFileBase:
|
||||||
assert file.extension == '.txt'
|
assert file.extension == '.txt'
|
||||||
|
|
||||||
def test_mimetypes(self, generic_conf_file):
|
def test_mimetypes(self, generic_conf_file):
|
||||||
assert generic_conf_file.has_mimetype()
|
|
||||||
assert generic_conf_file.mimetype == 'text/plain'
|
assert generic_conf_file.mimetype == 'text/plain'
|
||||||
assert generic_conf_file.main_type == 'text'
|
assert generic_conf_file.main_type == 'text'
|
||||||
assert generic_conf_file.sub_type == 'plain'
|
assert generic_conf_file.sub_type == 'plain'
|
||||||
|
assert generic_conf_file.has_mimetype
|
||||||
# Need to test something without a mimetype
|
# Need to test something without a mimetype
|
||||||
# Need to test something that's a directory
|
# Need to test something that's a directory
|
||||||
# Need to test something that causes the unicode exception
|
# Need to test something that causes the unicode exception
|
||||||
|
|
||||||
def test_has_mimetype_no_main_type(self, generic_conf_file):
|
def test_has_mimetype_no_main_type(self, generic_conf_file):
|
||||||
generic_conf_file.main_type = ''
|
generic_conf_file.main_type = ''
|
||||||
assert generic_conf_file.has_mimetype() is False
|
assert generic_conf_file.has_mimetype is False
|
||||||
|
|
||||||
def test_has_mimetype_no_sub_type(self, generic_conf_file):
|
def test_has_mimetype_no_sub_type(self, generic_conf_file):
|
||||||
generic_conf_file.sub_type = ''
|
generic_conf_file.sub_type = ''
|
||||||
assert generic_conf_file.has_mimetype() is False
|
assert generic_conf_file.has_mimetype is False
|
||||||
|
|
||||||
def test_has_extension(self, temp_file, temp_file_no_ext):
|
def test_has_extension(self, temp_file, temp_file_no_ext):
|
||||||
assert temp_file.has_extension() is True
|
assert temp_file.has_extension is True
|
||||||
assert temp_file_no_ext.has_extension() is False
|
print(temp_file_no_ext.extension)
|
||||||
assert temp_file_no_ext.log_details.get('no_extension') is True
|
assert temp_file_no_ext.has_extension is False
|
||||||
|
|
||||||
def test_add_log_details(self, generic_conf_file):
|
def test_set_property(self, generic_conf_file):
|
||||||
generic_conf_file.add_log_details('test', True)
|
generic_conf_file.set_property('test', True)
|
||||||
assert generic_conf_file.log_details['test'] is True
|
assert generic_conf_file.get_property('test') is True
|
||||||
with pytest.raises(KeyError):
|
assert generic_conf_file.get_property('wrong') is None
|
||||||
assert generic_conf_file.log_details['wrong'] is False
|
|
||||||
|
|
||||||
def test_marked_dangerous(self, file_marked_all_parameterized):
|
def test_marked_dangerous(self, file_marked_all_parameterized):
|
||||||
file_marked_all_parameterized.make_dangerous()
|
file_marked_all_parameterized.make_dangerous()
|
||||||
assert file_marked_all_parameterized.is_dangerous() is True
|
assert file_marked_all_parameterized.is_dangerous is True
|
||||||
# Should work regardless of weird paths??
|
# Should work regardless of weird paths??
|
||||||
# Should check file path alteration behavior as well
|
# Should check file path alteration behavior as well
|
||||||
|
|
||||||
def test_generic_dangerous(self, generic_conf_file):
|
def test_generic_dangerous(self, generic_conf_file):
|
||||||
assert generic_conf_file.is_dangerous() is False
|
assert generic_conf_file.is_dangerous is False
|
||||||
generic_conf_file.make_dangerous()
|
generic_conf_file.make_dangerous()
|
||||||
assert generic_conf_file.is_dangerous() is True
|
assert generic_conf_file.is_dangerous is True
|
||||||
|
|
||||||
def test_has_symlink(self, tmpdir):
|
def test_has_symlink(self, tmpdir):
|
||||||
file_path = tmpdir.join('test.txt')
|
file_path = tmpdir.join('test.txt')
|
||||||
|
@ -155,64 +144,88 @@ class TestFileBase:
|
||||||
file_path = file_path.strpath
|
file_path = file_path.strpath
|
||||||
symlink_path = tmpdir.join('symlinked.txt')
|
symlink_path = tmpdir.join('symlinked.txt')
|
||||||
symlink_path = symlink_path.strpath
|
symlink_path = symlink_path.strpath
|
||||||
file_symlink = os.symlink(file_path, symlink_path)
|
os.symlink(file_path, symlink_path)
|
||||||
file = FileBase(file_path, file_path)
|
file = FileBase(file_path, file_path)
|
||||||
symlink = FileBase(symlink_path, symlink_path)
|
symlink = FileBase(symlink_path, symlink_path)
|
||||||
assert file.is_symlink() is False
|
assert file.is_symlink is False
|
||||||
assert symlink.is_symlink() is True
|
assert symlink.is_symlink is True
|
||||||
|
|
||||||
def test_has_symlink_fixture(self, symlink):
|
def test_has_symlink_fixture(self, symlink_file):
|
||||||
assert symlink.is_symlink() is True
|
assert symlink_file.is_symlink is True
|
||||||
|
|
||||||
def test_generic_make_unknown(self, generic_conf_file):
|
def test_generic_make_unknown(self, generic_conf_file):
|
||||||
assert generic_conf_file.log_details.get('unknown') is None
|
assert generic_conf_file.is_unknown is False
|
||||||
generic_conf_file.make_unknown()
|
generic_conf_file.make_unknown()
|
||||||
assert generic_conf_file.log_details.get('unknown') is True
|
assert generic_conf_file.is_unknown
|
||||||
# given a FileBase object with no marking, should do the right things
|
# given a FileBase object with no marking, should do the right things
|
||||||
|
|
||||||
def test_marked_make_unknown(self, file_marked_all_parameterized):
|
def test_marked_make_unknown(self, file_marked_all_parameterized):
|
||||||
file = file_marked_all_parameterized
|
file = file_marked_all_parameterized
|
||||||
if file.log_details.get('unknown'):
|
if file.is_unknown:
|
||||||
file.make_unknown()
|
file.make_unknown()
|
||||||
assert file.log_details.get('unknown') is True
|
assert file.is_unknown
|
||||||
else:
|
else:
|
||||||
assert file.log_details.get('unknown') is None
|
assert file.is_unknown is False
|
||||||
file.make_unknown()
|
file.make_unknown()
|
||||||
assert file.log_details.get('unknown') is None
|
assert file.is_unknown is False
|
||||||
# given a FileBase object with an unrecognized marking, should ???
|
# given a FileBase object with an unrecognized marking, should ???
|
||||||
|
|
||||||
def test_generic_make_binary(self, generic_conf_file):
|
def test_generic_make_binary(self, generic_conf_file):
|
||||||
assert generic_conf_file.log_details.get('binary') is None
|
assert generic_conf_file.is_binary is False
|
||||||
generic_conf_file.make_binary()
|
generic_conf_file.make_binary()
|
||||||
assert generic_conf_file.log_details.get('binary') is True
|
assert generic_conf_file.is_binary
|
||||||
|
|
||||||
def test_marked_make_binary(self, file_marked_all_parameterized):
|
def test_marked_make_binary(self, file_marked_all_parameterized):
|
||||||
file = file_marked_all_parameterized
|
file = file_marked_all_parameterized
|
||||||
if file.log_details.get('dangerous'):
|
if file.is_dangerous:
|
||||||
file.make_binary()
|
file.make_binary()
|
||||||
assert file.log_details.get('binary') is None
|
assert file.is_binary is False
|
||||||
else:
|
else:
|
||||||
file.make_binary()
|
file.make_binary()
|
||||||
assert file.log_details.get('binary') is True
|
assert file.is_binary
|
||||||
|
|
||||||
def test_force_ext_change(self, generic_conf_file):
|
def test_force_ext_change(self, generic_conf_file):
|
||||||
assert generic_conf_file.has_extension()
|
assert generic_conf_file.has_extension
|
||||||
assert generic_conf_file.extension == '.conf'
|
assert generic_conf_file.get_property('extension') == '.conf'
|
||||||
assert os.path.splitext(generic_conf_file.dst_path)[1] == '.conf'
|
assert os.path.splitext(generic_conf_file.dst_path)[1] == '.conf'
|
||||||
generic_conf_file.force_ext('.txt')
|
generic_conf_file.force_ext('.txt')
|
||||||
assert os.path.splitext(generic_conf_file.dst_path)[1] == '.txt'
|
assert os.path.splitext(generic_conf_file.dst_path)[1] == '.txt'
|
||||||
assert generic_conf_file.log_details.get('force_ext') is True
|
assert generic_conf_file.get_property('force_ext') is True
|
||||||
# should make a file's extension change
|
assert generic_conf_file.get_property('extension') == '.txt'
|
||||||
# should be able to handle weird paths
|
# should be able to handle weird paths
|
||||||
|
|
||||||
def test_force_ext_correct(self, generic_conf_file):
|
def test_force_ext_correct(self, generic_conf_file):
|
||||||
assert generic_conf_file.has_extension()
|
assert generic_conf_file.has_extension
|
||||||
assert generic_conf_file.extension == '.conf'
|
assert generic_conf_file.get_property('extension') == '.conf'
|
||||||
generic_conf_file.force_ext('.conf')
|
generic_conf_file.force_ext('.conf')
|
||||||
assert os.path.splitext(generic_conf_file.dst_path)[1] == '.conf'
|
assert os.path.splitext(generic_conf_file.dst_path)[1] == '.conf'
|
||||||
assert generic_conf_file.log_details.get('force_ext') is None
|
assert generic_conf_file.get_property('force_ext') is None
|
||||||
# shouldn't change a file's extension if it already is right
|
# shouldn't change a file's extension if it already is right
|
||||||
|
|
||||||
|
def test_create_metadata_file(self, temp_file):
|
||||||
|
# Try making a metadata file
|
||||||
|
metadata_file_path = temp_file.create_metadata_file('.metadata.txt')
|
||||||
|
with open(metadata_file_path, 'w+') as metadata_file:
|
||||||
|
metadata_file.write('Have some metadata!')
|
||||||
|
# Shouldn't be able to make a metadata file with no extension
|
||||||
|
assert temp_file.create_metadata_file('') is False
|
||||||
|
# if metadata file already exists
|
||||||
|
# if there is no metadata to write should this work?
|
||||||
|
|
||||||
|
def test_safe_copy(self, generic_conf_file):
|
||||||
|
generic_conf_file.safe_copy()
|
||||||
|
# check that safe copy can handle weird file path inputs
|
||||||
|
|
||||||
|
|
||||||
|
class TestLogger:
|
||||||
|
|
||||||
|
@fixture
|
||||||
|
def generic_logger(self, tmpdir):
|
||||||
|
return GroomerLogger(tmpdir.strpath)
|
||||||
|
|
||||||
|
def test_tree(self, generic_logger):
|
||||||
|
generic_logger.tree(generic_logger.root_dir)
|
||||||
|
|
||||||
|
|
||||||
class TestKittenGroomerBase:
|
class TestKittenGroomerBase:
|
||||||
|
|
||||||
|
@ -236,39 +249,6 @@ class TestKittenGroomerBase:
|
||||||
debug_groomer = KittenGroomerBase(source_directory,
|
debug_groomer = KittenGroomerBase(source_directory,
|
||||||
dest_directory,
|
dest_directory,
|
||||||
debug=True)
|
debug=True)
|
||||||
# we should maybe protect access to self.current_file in some way?
|
|
||||||
|
|
||||||
def test_computehash(self, tmpdir):
|
|
||||||
file = tmpdir.join('test.txt')
|
|
||||||
file.write('testing')
|
|
||||||
simple_groomer = KittenGroomerBase(tmpdir.strpath, tmpdir.strpath)
|
|
||||||
simple_groomer._computehash(file.strpath)
|
|
||||||
|
|
||||||
def test_tree(self, generic_groomer):
|
|
||||||
generic_groomer.tree(generic_groomer.src_root_dir)
|
|
||||||
|
|
||||||
def test_safe_copy(self, tmpdir):
|
|
||||||
file = tmpdir.join('test.txt')
|
|
||||||
file.write('testing')
|
|
||||||
testdir = tmpdir.join('testdir')
|
|
||||||
os.mkdir(testdir.strpath)
|
|
||||||
filedest = testdir.join('test.txt')
|
|
||||||
simple_groomer = KittenGroomerBase(tmpdir.strpath, testdir.strpath)
|
|
||||||
simple_groomer.cur_file = FileBase(file.strpath, filedest.strpath)
|
|
||||||
assert simple_groomer._safe_copy() is True
|
|
||||||
#check that it handles weird file path inputs
|
|
||||||
|
|
||||||
def test_safe_metadata_split(self, tmpdir):
|
|
||||||
file = tmpdir.join('test.txt')
|
|
||||||
file.write('testing')
|
|
||||||
simple_groomer = KittenGroomerBase(tmpdir.strpath, tmpdir.strpath)
|
|
||||||
simple_groomer.cur_file = FileBase(file.strpath, file.strpath)
|
|
||||||
metadata_file = simple_groomer._safe_metadata_split('metadata.log')
|
|
||||||
metadata_file.write('Have some metadata!')
|
|
||||||
metadata_file.close()
|
|
||||||
assert simple_groomer._safe_metadata_split('') is False
|
|
||||||
# if metadata file already exists
|
|
||||||
# if there is no metadata to write should this work?
|
|
||||||
|
|
||||||
def test_list_all_files(self, tmpdir):
|
def test_list_all_files(self, tmpdir):
|
||||||
file = tmpdir.join('test.txt')
|
file = tmpdir.join('test.txt')
|
||||||
|
@ -276,15 +256,6 @@ class TestKittenGroomerBase:
|
||||||
testdir = tmpdir.join('testdir')
|
testdir = tmpdir.join('testdir')
|
||||||
os.mkdir(testdir.strpath)
|
os.mkdir(testdir.strpath)
|
||||||
simple_groomer = KittenGroomerBase(tmpdir.strpath, tmpdir.strpath)
|
simple_groomer = KittenGroomerBase(tmpdir.strpath, tmpdir.strpath)
|
||||||
files = simple_groomer._list_all_files(simple_groomer.src_root_dir)
|
files = simple_groomer.list_all_files(simple_groomer.src_root_dir)
|
||||||
assert file.strpath in files
|
assert file.strpath in files
|
||||||
assert testdir.strpath not in files
|
assert testdir.strpath not in files
|
||||||
|
|
||||||
def test_print_log(self, generic_groomer):
|
|
||||||
with pytest.raises(AttributeError):
|
|
||||||
generic_groomer._print_log()
|
|
||||||
# Kind of a bad test, but this should be implemented by the user anyway
|
|
||||||
|
|
||||||
def test_processdir(self, generic_groomer):
|
|
||||||
with pytest.raises(ImplementationRequired):
|
|
||||||
generic_groomer.processdir()
|
|
||||||
|
|
|
@ -0,0 +1,12 @@
|
||||||
|
src_invalid
|
||||||
|
===========
|
||||||
|
|
||||||
|
-
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
src_valid
|
||||||
|
=========
|
||||||
|
|
||||||
|
- Example.jpg: image/jpeg, obtained from wikipedia.org
|
||||||
|
- blah.conf: text file with a .conf extension
|
Loading…
Reference in New Issue