2015-11-05 23:15:07 +01:00
|
|
|
[![Build Status](https://travis-ci.org/CIRCL/PyCIRCLean.svg?branch=master)](https://travis-ci.org/CIRCL/PyCIRCLean)
|
2016-01-29 15:53:59 +01:00
|
|
|
[![codecov.io](https://codecov.io/github/CIRCL/PyCIRCLean/coverage.svg?branch=master)](https://codecov.io/github/CIRCL/PyCIRCLean?branch=master)
|
2015-11-05 23:15:07 +01:00
|
|
|
|
2015-05-11 14:32:59 +02:00
|
|
|
# PyCIRCLean
|
2015-10-29 17:23:20 +01:00
|
|
|
|
2016-11-30 19:36:10 +01:00
|
|
|
PyCIRCLean is the core Python code used by [CIRCLean](https://github.com/CIRCL/Circlean/), an open-source
|
2017-02-08 18:54:40 +01:00
|
|
|
USB key and document sanitizer created by [CIRCL](https://www.circl.lu/). This module has been separated from the
|
|
|
|
device-specific scripts and can be used for dedicated security applications to sanitize documents from hostile environments
|
2017-03-16 03:53:14 +01:00
|
|
|
to trusted environments. PyCIRCLean is currently Python 3.3+ compatible.
|
2015-10-29 17:23:20 +01:00
|
|
|
|
|
|
|
# Installation
|
|
|
|
|
|
|
|
~~~
|
2015-10-29 17:41:19 +01:00
|
|
|
python setup.py install
|
2015-10-29 17:23:20 +01:00
|
|
|
~~~
|
|
|
|
|
2016-11-30 19:36:10 +01:00
|
|
|
OR
|
|
|
|
|
|
|
|
~~~
|
|
|
|
pip install .
|
|
|
|
~~~
|
|
|
|
|
2015-10-29 17:23:20 +01:00
|
|
|
# How to use PyCIRCLean
|
|
|
|
|
2017-02-08 19:03:19 +01:00
|
|
|
PyCIRCLean is a simple Python library to handle file checking and sanitization.
|
2017-03-16 03:53:14 +01:00
|
|
|
PyCIRCLean is designed to be extended to cover specific checking
|
2017-02-08 19:03:19 +01:00
|
|
|
and sanitization workflows in different organizations such as industrial
|
2016-11-30 19:36:10 +01:00
|
|
|
environments or restricted/classified ICT environments. A series of practical examples utilizing PyCIRCLean can be found
|
2017-03-16 03:53:14 +01:00
|
|
|
in the [./examples](./examples) directory. Note: for commits beyond version 2.2.0 these
|
|
|
|
examples are not guaranteed to work with the PyCIRCLean API. Please check [helpers.py](./kittengroomer/helpers.py) or
|
|
|
|
[filecheck.py](./bin/filecheck.py) to see the new API interface.
|
2015-10-29 17:23:20 +01:00
|
|
|
|
2016-11-30 19:36:10 +01:00
|
|
|
The following simple example using PyCIRCLean will only copy files with a .conf extension matching the 'text/plain' MIME
|
|
|
|
type. If any other file is found in the source directory, the files won't be copied to the destination directory.
|
2015-10-29 17:23:20 +01:00
|
|
|
|
|
|
|
~~~python
|
|
|
|
#!/usr/bin/env python
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
import os
|
|
|
|
import magic
|
|
|
|
|
|
|
|
from kittengroomer import FileBase, KittenGroomerBase, main
|
|
|
|
|
|
|
|
|
|
|
|
# Extension
|
2017-03-16 03:53:14 +01:00
|
|
|
class Config:
|
|
|
|
configfiles = {'.conf': 'text/plain'}
|
2015-10-29 17:23:20 +01:00
|
|
|
|
|
|
|
|
|
|
|
class FileSpec(FileBase):
|
|
|
|
|
|
|
|
def __init__(self, src_path, dst_path):
|
2017-03-16 03:53:14 +01:00
|
|
|
"""Init file object, set the extension."""
|
2015-10-29 17:23:20 +01:00
|
|
|
super(FileSpec, self).__init__(src_path, dst_path)
|
2017-03-16 03:53:14 +01:00
|
|
|
self.valid_files = {}
|
2015-10-29 17:23:20 +01:00
|
|
|
a, self.extension = os.path.splitext(self.src_path)
|
|
|
|
self.mimetype = magic.from_file(self.src_path, mime=True).decode("utf-8")
|
2017-03-16 03:53:14 +01:00
|
|
|
# The initial version will only accept the file extensions/mimetypes listed here.
|
|
|
|
self.valid_files.update(Config.configfiles)
|
|
|
|
|
|
|
|
def check(self):
|
|
|
|
valid = True
|
|
|
|
expected_mime = self.valid_files.get(self.extension)
|
|
|
|
if expected_mime is None:
|
|
|
|
# Unexpected extension => disallowed
|
|
|
|
valid = False
|
|
|
|
compare_ext = 'Extension: {} - Expected: {}'.format(self.cur_file.extension, ', '.join(self.valid_files.keys()))
|
|
|
|
elif self.mimetype != expected_mime:
|
|
|
|
# Unexpected mimetype => disallowed
|
|
|
|
valid = False
|
|
|
|
compare_mime = 'Mime: {} - Expected: {}'.format(self.cur_file.mimetype, expected_mime)
|
|
|
|
self.add_log_details('valid', valid)
|
|
|
|
if valid:
|
|
|
|
self.cur_file.log_string = 'Extension: {} - MimeType: {}'.format(self.cur_file.extension, self.cur_file.mimetype)
|
|
|
|
else:
|
|
|
|
self.should_copy = False
|
|
|
|
if compare_ext is not None:
|
|
|
|
self.add_log_string(compare_ext)
|
|
|
|
else:
|
|
|
|
self.add_log_string(compare_mime)
|
|
|
|
if self.should_copy:
|
|
|
|
self.safe_copy()
|
|
|
|
self.write_log()
|
2015-10-29 17:23:20 +01:00
|
|
|
|
|
|
|
|
|
|
|
class KittenGroomerSpec(KittenGroomerBase):
|
|
|
|
|
|
|
|
def __init__(self, root_src=None, root_dst=None):
|
2017-03-16 03:53:14 +01:00
|
|
|
"""Initialize the basics of the copy."""
|
2015-10-29 17:23:20 +01:00
|
|
|
if root_src is None:
|
|
|
|
root_src = os.path.join(os.sep, 'media', 'src')
|
|
|
|
if root_dst is None:
|
|
|
|
root_dst = os.path.join(os.sep, 'media', 'dst')
|
|
|
|
super(KittenGroomerSpec, self).__init__(root_src, root_dst)
|
|
|
|
|
|
|
|
def processdir(self):
|
2017-03-16 03:53:14 +01:00
|
|
|
"""Main function doing the processing."""
|
2015-10-29 17:23:20 +01:00
|
|
|
to_copy = []
|
|
|
|
error = []
|
|
|
|
for srcpath in self._list_all_files(self.src_root_dir):
|
2017-03-16 03:53:14 +01:00
|
|
|
dstpath = srcpath.replace(self.src_root_dir, self.dst_root_dir)
|
|
|
|
cur_file = FileSpec(srcpath, dstpath)
|
|
|
|
cur_file.check()
|
2015-10-29 17:23:20 +01:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
main(KittenGroomerSpec, ' Only copy some files, returns an error is anything else is found')
|
2017-03-16 03:53:14 +01:00
|
|
|
|
2015-10-29 17:23:20 +01:00
|
|
|
~~~
|
|
|
|
|
2015-10-29 17:34:52 +01:00
|
|
|
# How to contribute
|
|
|
|
|
2017-02-08 19:03:19 +01:00
|
|
|
We welcome contributions (including bug fixes, new example file processing
|
|
|
|
workflows) via pull requests. We are particularly interested in any new workflows
|
|
|
|
that can be used to improve security in different organizations. If you see any
|
|
|
|
potential enhancements required to support your sanitization workflow, please feel
|
|
|
|
free to open an issue. Read [CONTRIBUTING.md](/CONTRIBUTING.md) for more
|
|
|
|
information.
|
2015-10-29 17:34:52 +01:00
|
|
|
|
2015-10-29 17:23:20 +01:00
|
|
|
|
|
|
|
# License
|
|
|
|
|
2015-10-29 17:25:45 +01:00
|
|
|
~~~
|
2015-10-29 17:23:20 +01:00
|
|
|
Copyright (C) 2013-2015 Raphaël Vinot
|
|
|
|
Copyright (C) 2013-2015 CIRCL - Computer Incident Response Center Luxembourg (℅ smile gie)
|
|
|
|
All rights reserved.
|
|
|
|
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
|
|
modification, are permitted provided that the following conditions are met:
|
|
|
|
* Redistributions of source code must retain the above copyright
|
|
|
|
notice, this list of conditions and the following disclaimer.
|
|
|
|
* Redistributions in binary form must reproduce the above copyright
|
|
|
|
notice, this list of conditions and the following disclaimer in the
|
|
|
|
documentation and/or other materials provided with the distribution.
|
|
|
|
* Neither the name of the organization nor the names of its contributors
|
|
|
|
may be used to endorse or promote products derived from this software
|
|
|
|
without specific prior written permission.
|
|
|
|
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
|
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
|
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
|
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER BE LIABLE FOR ANY
|
|
|
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
|
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
|
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
|
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
|
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
|
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
2015-10-29 17:25:45 +01:00
|
|
|
~~~
|