diff --git a/README.md b/README.md index f0a0cf3..0f368ab 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ PyCIRCLean is the core Python code used by [CIRCLean](https://github.com/CIRCL/Circlean/), an open-source USB key and document sanitizer created by [CIRCL](https://www.circl.lu/). This module has been separated from the device-specific scripts and can be used for dedicated security applications to sanitize documents from hostile environments -to trusted environments. PyCIRCLean is currently Python 3.3+ only. +to trusted environments. PyCIRCLean is currently Python 3.3+ compatible. # Installation @@ -23,10 +23,12 @@ pip install . # How to use PyCIRCLean PyCIRCLean is a simple Python library to handle file checking and sanitization. -PyCIRCLean is designed to be overloaded and extended to cover specific checking +PyCIRCLean is designed to be extended to cover specific checking and sanitization workflows in different organizations such as industrial environments or restricted/classified ICT environments. A series of practical examples utilizing PyCIRCLean can be found -in the [./examples](./examples) directory. +in the [./examples](./examples) directory. Note: for commits beyond version 2.2.0 these +examples are not guaranteed to work with the PyCIRCLean API. Please check [helpers.py](./kittengroomer/helpers.py) or +[filecheck.py](./bin/filecheck.py) to see the new API interface. The following simple example using PyCIRCLean will only copy files with a .conf extension matching the 'text/plain' MIME type. If any other file is found in the source directory, the files won't be copied to the destination directory. @@ -41,87 +43,69 @@ from kittengroomer import FileBase, KittenGroomerBase, main # Extension -configfiles = {'.conf': 'text/plain'} +class Config: + configfiles = {'.conf': 'text/plain'} class FileSpec(FileBase): def __init__(self, src_path, dst_path): - ''' Init file object, set the extension ''' + """Init file object, set the extension.""" super(FileSpec, self).__init__(src_path, dst_path) + self.valid_files = {} a, self.extension = os.path.splitext(self.src_path) self.mimetype = magic.from_file(self.src_path, mime=True).decode("utf-8") + # The initial version will only accept the file extensions/mimetypes listed here. + self.valid_files.update(Config.configfiles) + + def check(self): + valid = True + expected_mime = self.valid_files.get(self.extension) + if expected_mime is None: + # Unexpected extension => disallowed + valid = False + compare_ext = 'Extension: {} - Expected: {}'.format(self.cur_file.extension, ', '.join(self.valid_files.keys())) + elif self.mimetype != expected_mime: + # Unexpected mimetype => disallowed + valid = False + compare_mime = 'Mime: {} - Expected: {}'.format(self.cur_file.mimetype, expected_mime) + self.add_log_details('valid', valid) + if valid: + self.cur_file.log_string = 'Extension: {} - MimeType: {}'.format(self.cur_file.extension, self.cur_file.mimetype) + else: + self.should_copy = False + if compare_ext is not None: + self.add_log_string(compare_ext) + else: + self.add_log_string(compare_mime) + if self.should_copy: + self.safe_copy() + self.write_log() class KittenGroomerSpec(KittenGroomerBase): def __init__(self, root_src=None, root_dst=None): - ''' - Initialize the basics of the copy - ''' + """Initialize the basics of the copy.""" if root_src is None: root_src = os.path.join(os.sep, 'media', 'src') if root_dst is None: root_dst = os.path.join(os.sep, 'media', 'dst') super(KittenGroomerSpec, self).__init__(root_src, root_dst) - self.valid_files = {} - - # The initial version will only accept the file extensions/mimetypes listed here. - self.valid_files.update(configfiles) - - def _print_log(self): - ''' - Print the logs related to the current file being processed - ''' - tmp_log = self.log_name.fields(**self.cur_file.log_details) - if not self.cur_file.log_details.get('valid'): - tmp_log.warning(self.cur_file.log_string) - else: - tmp_log.debug(self.cur_file.log_string) def processdir(self): - ''' - Main function doing the processing - ''' + """Main function doing the processing.""" to_copy = [] error = [] for srcpath in self._list_all_files(self.src_root_dir): - valid = True - self.log_name.info('Processing {}', srcpath.replace(self.src_root_dir + '/', '')) - self.cur_file = FileSpec(srcpath, srcpath.replace(self.src_root_dir, self.dst_root_dir)) - expected_mime = self.valid_files.get(self.cur_file.extension) - if expected_mime is None: - # Unexpected extension => disallowed - valid = False - compare_ext = 'Extension: {} - Expected: {}'.format(self.cur_file.extension, ', '.join(self.valid_files.keys())) - elif self.cur_file.mimetype != expected_mime: - # Unexpected mimetype => disallowed - valid = False - compare_mime = 'Mime: {} - Expected: {}'.format(self.cur_file.mimetype, expected_mime) - self.cur_file.add_log_details('valid', valid) - if valid: - to_copy.append(self.cur_file) - self.cur_file.log_string = 'Extension: {} - MimeType: {}'.format(self.cur_file.extension, self.cur_file.mimetype) - else: - error.append(self.cur_file) - if compare_ext is not None: - self.cur_file.log_string = compare_ext - else: - self.cur_file.log_string = compare_mime - if len(error) > 0: - for f in error + to_copy: - self.cur_file = f - self._print_log() - else: - for f in to_copy: - self.cur_file = f - self._safe_copy() - self._print_log() + dstpath = srcpath.replace(self.src_root_dir, self.dst_root_dir) + cur_file = FileSpec(srcpath, dstpath) + cur_file.check() if __name__ == '__main__': main(KittenGroomerSpec, ' Only copy some files, returns an error is anything else is found') - exit(0) + ~~~ # How to contribute diff --git a/setup.py b/setup.py index c11f64d..4397c5f 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ from setuptools import setup setup( name='kittengroomer', - version='2.1', + version='2.1.0', author='Raphaël Vinot', author_email='raphael.vinot@circl.lu', maintainer='Raphaël Vinot',