Flexibility ++

* Make it easyer to implement different type of processing
* Add example for Pier9
* The generic version now converts pdf to pdf/a before HTML
pull/33/head
Raphaël Vinot 2015-04-03 17:45:48 +02:00
parent be71c85778
commit ce24b039d2
3 changed files with 190 additions and 82 deletions

View File

@ -2,13 +2,9 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import magic import magic
import os import os
import shutil
import mimetypes import mimetypes
from twiggy import quickSetup, log
import shlex
import subprocess
import time
from helpers import FileBase, KittenGroomerBase
LIBREOFFICE = '/usr/bin/unoconv' LIBREOFFICE = '/usr/bin/unoconv'
GS = '/usr/bin/gs' GS = '/usr/bin/gs'
@ -27,21 +23,15 @@ mimes_compressed = ['zip', 'x-rar', 'x-bzip2', 'x-lzip', 'x-lzma', 'x-lzop',
mimes_data = ['octet-stream'] mimes_data = ['octet-stream']
class File(object): class File(FileBase):
def __init__(self, src_path, dst_path, main_type, sub_type): def __init__(self, src_path, dst_path, main_type, sub_type):
self.src_path = src_path super(File, self).__init__(src_path, dst_path)
self.dst_path = dst_path
self.main_type = main_type self.main_type = main_type
self.sub_type = sub_type self.sub_type = sub_type
self.log_details = {'filepath': self.src_path, 'maintype': self.main_type, self.log_details.update({'maintype': self.main_type, 'subtype': self.sub_type})
'subtype': self.sub_type}
self.expected_mimetype, self.expected_extensions = self.crosscheck_mime() self.expected_mimetype, self.expected_extensions = self.crosscheck_mime()
self.is_recursive = False self.is_recursive = False
self.log_string = ''
def add_log_details(self, key, value):
self.log_details[key] = value
def crosscheck_mime(self): def crosscheck_mime(self):
# /usr/share/mime has interesting stuff # /usr/share/mime has interesting stuff
@ -69,39 +59,19 @@ class File(object):
actual_mimetype = '{}/{}'.format(self.main_type, self.sub_type) actual_mimetype = '{}/{}'.format(self.main_type, self.sub_type)
return actual_mimetype == self.expected_mimetype return actual_mimetype == self.expected_mimetype
def make_dangerous(self):
self.log_details['dangerous'] = True
path, filename = os.path.split(self.dst_path)
self.dst_path = os.path.join(path, 'DANGEROUS_{}_DANGEROUS'.format(filename))
def make_unknown(self): class KittenGroomer(KittenGroomerBase):
self.log_details['unknown'] = True
path, filename = os.path.split(self.dst_path)
self.dst_path = os.path.join(path, 'UNKNOWN_{}'.format(filename))
def make_binary(self): def __init__(self, root_src=None, root_dst=None, max_recursive=5):
self.log_details['binary'] = True if root_src is None:
path, filename = os.path.split(self.dst_path) root_src = os.path.join(os.sep, 'media', 'src')
self.dst_path = os.path.join(path, '{}.bin'.format(filename)) if root_dst is None:
root_dst = os.path.join(os.sep, 'media', 'dst')
super(KittenGroomer, self).__init__(root_src, root_dst)
class KittenGroomer(object):
def __init__(self, max_recursive=5):
self.src_root_dir = os.path.join(os.sep, 'media', 'src')
self.dst_root_dir = os.path.join(os.sep, 'media', 'dst')
self.log_root_dir = os.path.join(self.dst_root_dir, 'logs')
self.log_processing = os.path.join(self.log_root_dir, 'processing.log')
self.recursive = 0 self.recursive = 0
self.max_recursive = max_recursive self.max_recursive = max_recursive
# quickSetup(file=self.log_processing)
quickSetup()
self.log_name = log.name('files')
self.cur_file = None
subtypes_apps = [ subtypes_apps = [
(mimes_office, self._office_related), (mimes_office, self._office_related),
(mimes_pdf, self._pdf), (mimes_pdf, self._pdf),
@ -133,30 +103,6 @@ class KittenGroomer(object):
to_return[st] = fct to_return[st] = fct
return to_return return to_return
def _safe_rmtree(self, directory):
if os.path.exists(directory):
shutil.rmtree(directory)
def _safe_remove(self, filepath):
if os.path.exists(filepath):
os.remove(filepath)
def _safe_mkdir(self, directory):
if not os.path.exists(directory):
os.makedirs(directory)
def _safe_copy(self):
''' Create dir if needed '''
try:
dst_path, filename = os.path.split(self.cur_file.dst_path)
self._safe_mkdir(dst_path)
shutil.copy(self.cur_file.src_path, self.cur_file.dst_path)
return True
except Exception as e:
# TODO: Logfile
print(e)
return False
def _list_all_files(self, directory): def _list_all_files(self, directory):
for root, dirs, files in os.walk(directory): for root, dirs, files in os.walk(directory):
for filename in files: for filename in files:
@ -165,16 +111,6 @@ class KittenGroomer(object):
maintype, subtype = mimetype.split('/') maintype, subtype = mimetype.split('/')
yield filepath, maintype, subtype yield filepath, maintype, subtype
def _run_process(self, command_line):
args = shlex.split(command_line)
p = subprocess.Popen(args)
while True:
code = p.poll()
if code is not None:
break
time.sleep(1)
return True
def _print_log(self): def _print_log(self):
tmp_log = self.log_name.fields(**self.cur_file.log_details) tmp_log = self.log_name.fields(**self.cur_file.log_details)
if self.cur_file.log_details.get('dangerous'): if self.cur_file.log_details.get('dangerous'):
@ -278,7 +214,7 @@ class KittenGroomer(object):
extract_command = '{} -p1 x {} -o{} -bd'.format(SEVENZ, self.cur_file.src_path, tmpdir) extract_command = '{} -p1 x {} -o{} -bd'.format(SEVENZ, self.cur_file.src_path, tmpdir)
self._run_process(extract_command) self._run_process(extract_command)
self.recursive += 1 self.recursive += 1
self.processdir(self.cur_file.dst_path, tmpdir) self.processdir(tmpdir, self.cur_file.dst_path)
self.recursive -= 1 self.recursive -= 1
self._safe_rmtree(tmpdir) self._safe_rmtree(tmpdir)
@ -315,11 +251,11 @@ class KittenGroomer(object):
####################### #######################
def processdir(self, dst_dir=None, src_dir=None): def processdir(self, src_dir=None, dst_dir=None):
if dst_dir is None:
dst_dir = self.dst_root_dir
if src_dir is None: if src_dir is None:
src_dir = self.src_root_dir src_dir = self.src_root_dir
if dst_dir is None:
dst_dir = self.dst_root_dir
if self.recursive > 0: if self.recursive > 0:
self._print_log() self._print_log()
@ -344,6 +280,6 @@ class KittenGroomer(object):
self._print_log() self._print_log()
if __name__ == '__main__': if __name__ == '__main__':
kg = KittenGroomer() kg = KittenGroomer('/home/raphael/gits/KittenGroomer/tests/content_img_vfat_norm',
kg.processdir('/home/raphael/gits/KittenGroomer/tests/content_img_vfat_norm_out', '/home/raphael/gits/KittenGroomer/tests/content_img_vfat_norm_out')
'/home/raphael/gits/KittenGroomer/tests/content_img_vfat_norm') kg.processdir()

View File

@ -0,0 +1,62 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
from helpers import FileBase, KittenGroomerBase
printers = ['.STL', '.obj']
cnc = ['.nc', '.tap', '.gcode', '.dxf', '.stl', '.obj', '.iges', '.igs',
'.vrml', '.vrl', '.thing', '.step', '.stp', '.x3d']
shopbot = ['.ai', '.svg', '.dxf', '.dwg', '.eps']
omax = ['.ai', '.svg', '.dxf', '.dwg', '.eps', '.omx', '.obj']
epilog_laser = ['.ai', '.svg', '.dxf', '.dwg', '.eps']
metabeam = ['.dxf']
up = ['.upp', '.up3', '.stl', '.obj']
class FilePier9(FileBase):
def __init__(self, src_path, dst_path):
super(FilePier9, self).__init__(src_path, dst_path)
a, self.extension = os.path.splitext(self.src_path)
class KittenGroomerPier9(KittenGroomerBase):
def __init__(self, root_src=None, root_dst=None):
if root_src is None:
root_src = os.path.join(os.sep, 'media', 'src')
if root_dst is None:
root_dst = os.path.join(os.sep, 'media', 'dst')
super(KittenGroomerPier9, self).__init__(root_src, root_dst)
# The initial version will accept all the file extension for all the machines.
self.authorized_extensions = printers + cnc + shopbot + omax + epilog_laser + metabeam + up
def _print_log(self):
tmp_log = self.log_name.fields(**self.cur_file.log_details)
if not self.cur_file.log_details.get('valid'):
tmp_log.warning(self.cur_file.log_string)
else:
tmp_log.debug(self.cur_file.log_string)
def processdir(self):
for srcpath in self._list_all_files(self.src_root_dir):
self.log_name.info('Processing {}', srcpath.replace(self.src_root_dir + '/', ''))
self.cur_file = FilePier9(srcpath, srcpath.replace(self.src_root_dir, self.dst_root_dir))
if self.cur_file.extension in self.authorized_extensions:
self.cur_file.add_log_details('valid', True)
self.cur_file.log_string = 'Expected extension: ' + self.cur_file.extension
self._safe_copy()
else:
self.cur_file.log_string = 'Bad extension: ' + self.cur_file.extension
self._print_log()
if __name__ == '__main__':
kg = KittenGroomerPier9('/home/raphael/gits/KittenGroomer/tests/content_img_vfat_norm',
'/home/raphael/gits/KittenGroomer/tests/content_img_vfat_norm_out')
kg.processdir()

110
fs/opt/groomer/helpers.py Normal file
View File

@ -0,0 +1,110 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import shutil
from twiggy import quickSetup, log
import shlex
import subprocess
import time
class KittenGroomerError(Exception):
def __init__(self, message):
super(KittenGroomerError, self).__init__(message)
self.message = message
class ImplementationRequired(KittenGroomerError):
pass
class FileBase(object):
def __init__(self, src_path, dst_path):
self.src_path = src_path
self.dst_path = dst_path
self.log_details = {'filepath': self.src_path}
self.log_string = ''
def add_log_details(self, key, value):
self.log_details[key] = value
def make_dangerous(self):
self.log_details['dangerous'] = True
path, filename = os.path.split(self.dst_path)
self.dst_path = os.path.join(path, 'DANGEROUS_{}_DANGEROUS'.format(filename))
def make_unknown(self):
self.log_details['unknown'] = True
path, filename = os.path.split(self.dst_path)
self.dst_path = os.path.join(path, 'UNKNOWN_{}'.format(filename))
def make_binary(self):
self.log_details['binary'] = True
path, filename = os.path.split(self.dst_path)
self.dst_path = os.path.join(path, '{}.bin'.format(filename))
class KittenGroomerBase(object):
def __init__(self, root_src, root_dst):
self.src_root_dir = root_src
self.dst_root_dir = root_dst
self.log_root_dir = os.path.join(self.dst_root_dir, 'logs')
self.log_processing = os.path.join(self.log_root_dir, 'processing.log')
# quickSetup(file=self.log_processing)
quickSetup()
self.log_name = log.name('files')
self.cur_file = None
# ##### Helpers #####
def _safe_rmtree(self, directory):
if os.path.exists(directory):
shutil.rmtree(directory)
def _safe_remove(self, filepath):
if os.path.exists(filepath):
os.remove(filepath)
def _safe_mkdir(self, directory):
if not os.path.exists(directory):
os.makedirs(directory)
def _safe_copy(self):
''' Create dir if needed '''
try:
dst_path, filename = os.path.split(self.cur_file.dst_path)
self._safe_mkdir(dst_path)
shutil.copy(self.cur_file.src_path, self.cur_file.dst_path)
return True
except Exception as e:
# TODO: Logfile
print(e)
return False
def _list_all_files(self, directory):
for root, dirs, files in os.walk(directory):
for filename in files:
filepath = os.path.join(root, filename)
yield filepath
def _run_process(self, command_line):
args = shlex.split(command_line)
p = subprocess.Popen(args)
while True:
code = p.poll()
if code is not None:
break
time.sleep(1)
return True
def _print_log(self):
# Not implemented
pass
#######################
def processdir(self, src_dir=None, dst_dir=None):
raise ImplementationRequired('You have to implement the result processdir.')