From aabb0effc5e3c656e774f71b3dfc5e64f73c31d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Thu, 2 Apr 2015 17:40:39 +0200 Subject: [PATCH] update python module --- fs/opt/groomer/functions.py | 19 ++++++++++++------- fs/opt/groomer/groomer.sh | 3 +++ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/fs/opt/groomer/functions.py b/fs/opt/groomer/functions.py index ff1fef0..5f5ca2d 100644 --- a/fs/opt/groomer/functions.py +++ b/fs/opt/groomer/functions.py @@ -10,6 +10,7 @@ import subprocess import time + LIBREOFFICE = '/usr/bin/unoconv' PDF2HTMLEX = '/usr/bin/pdf2htmlEX' SEVENZ = '/usr/bin/7z' @@ -240,23 +241,27 @@ class KittenGroomer(object): def _office_related(self): self.cur_file.add_log_details('processing_type', 'office') - dst_path, filename = os.path.split(self.cur_file.dst_path) + dst_dir, filename = os.path.split(self.cur_file.dst_path) name, ext = os.path.splitext(filename) - tmpdir = os.path.join(dst_path, 'temp') - tmppath = os.path.join(tmpdir, name, '.pdf') + tmpdir = os.path.join(dst_dir, 'temp') + tmppath = os.path.join(tmpdir, name + '.pdf') self._safe_mkdir(tmpdir) lo_command = '{} --format pdf -eSelectPdfVersion=1 --output {} {}'.format( LIBREOFFICE, tmppath, self.cur_file.src_path) self._run_process(lo_command) - pdf_command = '{} --dest-dir=/ {} {}'.format(PDF2HTMLEX, tmppath, self.cur_file.dst_path) - self._run_process(pdf_command) + self.__pdf(tmppath) self._safe_rmtree(tmpdir) + def __pdf(self, tmpsrcpath): + pdf_command = '{} --dest-dir / {} {}'.format(PDF2HTMLEX, tmpsrcpath, + self.cur_file.dst_path + '.html') + self._run_process(pdf_command) + def _pdf(self): self.cur_file.add_log_details('processing_type', 'pdf') + # FIXME: convert pdf to pdf/a if needed prior to converting to html # TODO: Convert to pdf/A - pdf_command = '{} --dest-dir {} {}'.format(PDF2HTMLEX, self.cur_file.dst_path, self.cur_file.src_path) - self._run_process(pdf_command) + self.__pdf(self.cur_file.src_path) def _archive(self): self.cur_file.add_log_details('processing_type', 'archive') diff --git a/fs/opt/groomer/groomer.sh b/fs/opt/groomer/groomer.sh index 596d479..1cd59d0 100755 --- a/fs/opt/groomer/groomer.sh +++ b/fs/opt/groomer/groomer.sh @@ -71,6 +71,9 @@ fi # Groom da kitteh! +# Find the FS types +# lsblk -n -o name,fstype,mountpoint,label,uuid -r + PARTCOUNT=1 for partition in ${DEV_PARTITIONS} do