diff --git a/.gitignore b/.gitignore index 95e49f2..ecf6be3 100644 --- a/.gitignore +++ b/.gitignore @@ -67,8 +67,8 @@ target/ *.vrb # Project specific -/tests/dst/* -!/tests/logs/ -!/tests/.keepdir - - +tests/dst/* +tests/test_logs/* +!tests/**/.keepdir +!tests/src_invalid/* +!tests/src_valid/* diff --git a/.travis.yml b/.travis.yml index 8866a29..8e3dfb7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,8 +17,6 @@ addons: packages: # General dependencies - p7zip-full - # generic.py dependencies - - ghostscript # Testing dependencies - mercurial @@ -26,21 +24,7 @@ install: # General dependencies - sudo add-apt-repository "deb http://archive.ubuntu.com/ubuntu/ trusty multiverse" && sudo add-apt-repository "deb http://archive.ubuntu.com/ubuntu/ trusty-updates multiverse" - sudo apt-get update -qq - - sudo apt-get install -y p7zip-rar - # generic.py: pdf2htmlEX + dependencies - - sudo add-apt-repository ppa:fontforge/fontforge --yes - # to get a working 0.26 poppler - - sudo add-apt-repository ppa:delayargentina/delayx --yes - - sudo apt-get update -qq - - sudo apt-get install -y libpoppler-dev libpoppler-private-dev libspiro-dev libcairo-dev libpango1.0-dev libfreetype6-dev libltdl-dev libfontforge-dev python-imaging python-pip firefox xvfb - - git clone https://github.com/coolwanglu/pdf2htmlEX.git - - pushd pdf2htmlEX - - cmake -DCMAKE_INSTALL_PREFIX:PATH=/usr -DENABLE_SVG=ON . - - make - - sudo make install - - popd - # generic.py: Other dependencies - - sudo apt-get install -y libreoffice libreoffice-script-provider-python unoconv + - sudo apt-get install -y p7zip-rar python-pip # filecheck.py dependencies - sudo apt-get install libxml2-dev libxslt1-dev - wget https://didierstevens.com/files/software/pdfid_v0_2_1.zip @@ -82,7 +66,7 @@ install: - wget --no-check-certificate https://www.officedissector.com/corpus/fraunhoferlibrary.zip - unzip -o fraunhoferlibrary.zip - rm fraunhoferlibrary.zip - - 7z x 42.zip -p42 + - 7z x -p42 42.zip - wget http://www.sample-videos.com/audio/mp3/india-national-anthem.mp3 - wget http://www.sample-videos.com/video/mp4/720/big_buck_bunny_720p_1mb.mp4 - wget http://thewalter.net/stef/software/rtfx/sample.rtf diff --git a/bin/README.md b/bin/README.md index b910162..f509f34 100644 --- a/bin/README.md +++ b/bin/README.md @@ -1,25 +1,18 @@ -Examples -======== - -These are several sanitizers that demonstrate PyCIRCLean's capabilities. Feel free to -adapt or modify any of them to suit your requirements. In order to use any of these scripts, -you will first need to install the PyCIRCLean dependencies (preferably in a virtualenv): - -``` - pip install . -``` - -Requirements per script -======================= - filecheck.py ------------- +============ This is the script used by the [CIRCLean](https://github.com/CIRCL/Circlean) USB key sanitizer. It is designed to handle a range of file types, and will mark them as dangerous if they meet certain criteria. -Requirements by type of document: +Before installing the filecheck.py depenencies, make sure to install the PyCIRCLean +dependencies: + +``` + pip install . +``` + +Dependencies by type of document: * Microsoft office: oletools, olefile * OOXML: officedissector * PDF: pdfid @@ -38,47 +31,3 @@ manually in the directory where filecheck will be run. wget https://didierstevens.com/files/software/pdfid_v0_2_1.zip unzip pdfid_v0_2_1.zip ``` - -generic.py ----------- - -This is a script used by an older version of CIRCLean. It has more dependencies -than filecheck.py and they are more complicated to install. - -Requirements by type of document: -* Office and all text files: unoconv, libreoffice -* PDF: ghostscript, pdf2htmlEX - -``` - # required for pdf2htmlEX - sudo add-apt-repository ppa:fontforge/fontforge --yes - sudo add-apt-repository ppa:coolwanglu/pdf2htmlex --yes - sudo apt-get update -qq - sudo apt-get install -qq libpoppler-dev libpoppler-private-dev libspiro-dev libcairo-dev libpango1.0-dev libfreetype6-dev libltdl-dev libfontforge-dev python-imaging python-pip firefox xvfb - # install pdf2htmlEX - git clone https://github.com/coolwanglu/pdf2htmlEX.git - pushd pdf2htmlEX - cmake -DCMAKE_INSTALL_PREFIX:PATH=/usr -DENABLE_SVG=ON . - make - sudo make install - popd - # Installing the rest - sudo apt-get install ghostscript p7zip-full p7zip-rar libreoffice unoconv -``` - -pier9.py --------- - -This script has a list of file formats for various brands of industrial -manufacturing equipment, such as 3d printers, CNC machines, etc. It only -copies files that match these file formats. - -No external dependencies required. - -specific.py ------------ - -As the name suggests, this script copies only specific file formats according -to the configuration provided by the user. - -No external dependencies required. diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 0000000..933e1d9 --- /dev/null +++ b/examples/README.md @@ -0,0 +1,56 @@ +Examples +======== + +These are several sanitizers that demonstrate PyCIRCLean's capabilities. Feel free to +adapt or modify any of them to suit your requirements. In order to use any of these scripts, +you will first need to install the PyCIRCLean dependencies (preferably in a virtualenv): + +``` + pip install . +``` + +Requirements per script +======================= + +generic.py +---------- + +This is a script that was used by an older version of CIRCLean. + +Requirements by type of document: +* Office and all text files: unoconv, libreoffice +* PDF: ghostscript, pdf2htmlEX + +``` + # required for pdf2htmlEX + sudo add-apt-repository ppa:fontforge/fontforge --yes + sudo add-apt-repository ppa:coolwanglu/pdf2htmlex --yes + sudo apt-get update -qq + sudo apt-get install -qq libpoppler-dev libpoppler-private-dev libspiro-dev libcairo-dev libpango1.0-dev libfreetype6-dev libltdl-dev libfontforge-dev python-imaging python-pip firefox xvfb + # install pdf2htmlEX + git clone https://github.com/coolwanglu/pdf2htmlEX.git + pushd pdf2htmlEX + cmake -DCMAKE_INSTALL_PREFIX:PATH=/usr -DENABLE_SVG=ON . + make + sudo make install + popd + # Installing the rest + sudo apt-get install ghostscript p7zip-full p7zip-rar libreoffice unoconv +``` + +pier9.py +-------- + +This script contains a list of file formats for various brands of industrial +manufacturing equipment, such as 3d printers, CNC machines, etc. It only +copies files that match these file formats. + +No external dependencies required. + +specific.py +----------- + +As the name suggests, this script copies only specific file formats according +to the configuration provided by the user. + +No external dependencies required. diff --git a/bin/generic.py b/examples/generic.py similarity index 100% rename from bin/generic.py rename to examples/generic.py diff --git a/bin/pier9.py b/examples/pier9.py similarity index 100% rename from bin/pier9.py rename to examples/pier9.py diff --git a/bin/specific.py b/examples/specific.py similarity index 100% rename from bin/specific.py rename to examples/specific.py diff --git a/setup.py b/setup.py index f20da6a..7f84998 100644 --- a/setup.py +++ b/setup.py @@ -12,9 +12,6 @@ setup( description='Standalone CIRCLean/KittenGroomer code.', packages=['kittengroomer'], scripts=[ - 'bin/generic.py', - 'bin/pier9.py', - 'bin/specific.py', 'bin/filecheck.py' ], include_package_data=True, diff --git a/tests/src_invalid/42.zip b/tests/src_invalid/42.zip new file mode 100644 index 0000000..e768153 Binary files /dev/null and b/tests/src_invalid/42.zip differ diff --git a/tests/src_invalid/blah.zip b/tests/src_invalid/blah.zip new file mode 100644 index 0000000..3e809f4 Binary files /dev/null and b/tests/src_invalid/blah.zip differ diff --git a/tests/test_generic.py b/tests/test_generic.py deleted file mode 100644 index a17fb5e..0000000 --- a/tests/test_generic.py +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import os - -import pytest - -from bin.generic import KittenGroomer, File, main -from tests.logging import save_logs - -skipif_nodeps = pytest.mark.skipif(os.path.exists('/usr/bin/unoconv') is False, - reason="Dependencies aren't installed") - - -@skipif_nodeps -class TestIntegration: - - @pytest.fixture - def src_valid(self): - return os.path.join(os.getcwd(), 'tests/src_valid') - - @pytest.fixture - def src_invalid(self): - return os.path.join(os.getcwd(), 'tests/src_invalid') - - @pytest.fixture - def dst(self): - return os.path.join(os.getcwd(), 'tests/dst') - - def test_generic(self, src_valid, dst): - groomer = KittenGroomer(src_valid, dst, debug=True) - groomer.processdir() - test_description = 'generic_valid' - save_logs(groomer, test_description) - - def test_generic_2(self, src_invalid, dst): - groomer = KittenGroomer(src_invalid, dst, debug=True) - groomer.processdir() - test_description = 'generic_invalid' - save_logs(groomer, test_description) - - -class TestFileHandling: - pass - - # We're going to give KittenGroomer a bunch of files, and it's going to process them - # Maybe we want to make a function that processdir delegates to? Or is it just the File Object that's responsible? - # Ideally we should be able to pass a path to a function and have it do stuff? And then we can test that function? - # So we have a function that takes a path and returns...log info? That makes sense actually. Or some sort of meta data - # The function could maybe be called processfile diff --git a/tests/test_specific_and_pier9.py b/tests/test_specific_and_pier9.py deleted file mode 100644 index d411aa8..0000000 --- a/tests/test_specific_and_pier9.py +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import os - -import pytest - -from bin.specific import KittenGroomerSpec -from bin.pier9 import KittenGroomerPier9 -from tests.logging import save_logs - - -@pytest.fixture -def src_valid(): - return os.path.join(os.getcwd(), 'tests/src_valid') - - -@pytest.fixture -def src_invalid(): - return os.path.join(os.getcwd(), 'tests/src_invalid') - - -@pytest.fixture -def dst(): - return os.path.join(os.getcwd(), 'tests/dst') - - -def test_specific_valid(src_valid, dst): - groomer = KittenGroomerSpec(src_valid, dst, debug=True) - groomer.processdir() - test_description = 'specific_valid' - save_logs(groomer, test_description) - - -def test_specific_invalid(src_invalid, dst): - groomer = KittenGroomerSpec(src_invalid, dst, debug=True) - groomer.processdir() - test_description = 'specific_invalid' - save_logs(groomer, test_description) - - -def test_pier9_valid(src_invalid, dst): - groomer = KittenGroomerPier9(src_invalid, dst, debug=True) - groomer.processdir() - test_description = 'pier9_valid' - save_logs(groomer, test_description) - - -def test_pier9_invalid(src_invalid, dst): - groomer = KittenGroomerPier9(src_invalid, dst, debug=True) - groomer.processdir() - test_description = 'pier9_invalid' - save_logs(groomer, test_description)