update python module

pull/33/head
Raphaël Vinot 2015-04-02 17:40:39 +02:00
parent 8cfe3c19b0
commit aabb0effc5
2 changed files with 15 additions and 7 deletions

View File

@ -10,6 +10,7 @@ import subprocess
import time import time
LIBREOFFICE = '/usr/bin/unoconv' LIBREOFFICE = '/usr/bin/unoconv'
PDF2HTMLEX = '/usr/bin/pdf2htmlEX' PDF2HTMLEX = '/usr/bin/pdf2htmlEX'
SEVENZ = '/usr/bin/7z' SEVENZ = '/usr/bin/7z'
@ -240,23 +241,27 @@ class KittenGroomer(object):
def _office_related(self): def _office_related(self):
self.cur_file.add_log_details('processing_type', 'office') self.cur_file.add_log_details('processing_type', 'office')
dst_path, filename = os.path.split(self.cur_file.dst_path) dst_dir, filename = os.path.split(self.cur_file.dst_path)
name, ext = os.path.splitext(filename) name, ext = os.path.splitext(filename)
tmpdir = os.path.join(dst_path, 'temp') tmpdir = os.path.join(dst_dir, 'temp')
tmppath = os.path.join(tmpdir, name, '.pdf') tmppath = os.path.join(tmpdir, name + '.pdf')
self._safe_mkdir(tmpdir) self._safe_mkdir(tmpdir)
lo_command = '{} --format pdf -eSelectPdfVersion=1 --output {} {}'.format( lo_command = '{} --format pdf -eSelectPdfVersion=1 --output {} {}'.format(
LIBREOFFICE, tmppath, self.cur_file.src_path) LIBREOFFICE, tmppath, self.cur_file.src_path)
self._run_process(lo_command) self._run_process(lo_command)
pdf_command = '{} --dest-dir=/ {} {}'.format(PDF2HTMLEX, tmppath, self.cur_file.dst_path) self.__pdf(tmppath)
self._run_process(pdf_command)
self._safe_rmtree(tmpdir) self._safe_rmtree(tmpdir)
def __pdf(self, tmpsrcpath):
pdf_command = '{} --dest-dir / {} {}'.format(PDF2HTMLEX, tmpsrcpath,
self.cur_file.dst_path + '.html')
self._run_process(pdf_command)
def _pdf(self): def _pdf(self):
self.cur_file.add_log_details('processing_type', 'pdf') self.cur_file.add_log_details('processing_type', 'pdf')
# FIXME: convert pdf to pdf/a if needed prior to converting to html
# TODO: Convert to pdf/A # TODO: Convert to pdf/A
pdf_command = '{} --dest-dir {} {}'.format(PDF2HTMLEX, self.cur_file.dst_path, self.cur_file.src_path) self.__pdf(self.cur_file.src_path)
self._run_process(pdf_command)
def _archive(self): def _archive(self):
self.cur_file.add_log_details('processing_type', 'archive') self.cur_file.add_log_details('processing_type', 'archive')

View File

@ -71,6 +71,9 @@ fi
# Groom da kitteh! # Groom da kitteh!
# Find the FS types
# lsblk -n -o name,fstype,mountpoint,label,uuid -r
PARTCOUNT=1 PARTCOUNT=1
for partition in ${DEV_PARTITIONS} for partition in ${DEV_PARTITIONS}
do do