diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..19c720e --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +include kittengroomer/data/* diff --git a/README.rst b/README.rst new file mode 120000 index 0000000..42061c0 --- /dev/null +++ b/README.rst @@ -0,0 +1 @@ +README.md \ No newline at end of file diff --git a/bin/generic.py b/bin/generic.py index 45cef44..5c3d005 100644 --- a/bin/generic.py +++ b/bin/generic.py @@ -19,7 +19,7 @@ SEVENZ = '/usr/bin/7z' # Prepare application/ mimes_office = ['msword', 'vnd.openxmlformats-officedocument.', 'vnd.ms-', 'vnd.oasis.opendocument'] -mimes_pdf = ['pdf'] +mimes_pdf = ['pdf', 'postscript'] mimes_xml = ['xml'] mimes_ms = ['x-dosexec'] mimes_compressed = ['zip', 'x-rar', 'x-bzip2', 'x-lzip', 'x-lzma', 'x-lzop', @@ -49,8 +49,12 @@ class File(FileBase): mimetype = magic.from_file(src_path, mime=True).decode("utf-8") self.main_type, self.sub_type = mimetype.split('/') a, self.extension = os.path.splitext(src_path) + self.is_recursive = False self.log_details.update({'maintype': self.main_type, 'subtype': self.sub_type, 'extension': self.extension}) + # If the mimetype matches as text/*, it will be sent to LibreOffice, no need to cross check the mime/ext + if self.main_type == 'text': + return # Check correlation known extension => actual mime type if propertype.get(self.extension) is not None: @@ -77,8 +81,6 @@ class File(FileBase): # there are no known extensions associated to this mimetype. pass - self.is_recursive = False - class KittenGroomer(KittenGroomerBase): @@ -253,9 +255,13 @@ class KittenGroomer(KittenGroomerBase): tmpdir = os.path.join(dst_dir, 'temp') tmppath = os.path.join(tmpdir, filename) self._safe_mkdir(tmpdir) - gs_command = '{} -dPDFA -dBATCH -dNOPAUSE -sDEVICE=pdfwrite -sOutputFile={} {}'.format( - GS, tmppath, self.cur_file.src_path) + # The magic comes from here: http://svn.ghostscript.com/ghostscript/trunk/gs/doc/Ps2pdf.htm#PDFA + curdir = os.getcwd() + os.chdir(self.ressources_path) + gs_command = '{} -dPDFA -dBATCH -dNOPAUSE -dNOOUTERSAVE -sProcessColorModel=DeviceCMYK -sDEVICE=pdfwrite -sPDFACompatibilityPolicy=1 -sOutputFile={} PDFA_def.ps {}'.format( + GS, os.path.join(curdir, tmppath), os.path.join(curdir, self.cur_file.src_path)) self._run_process(gs_command) + os.chdir(curdir) self._pdfa(tmppath) self._safe_rmtree(tmpdir) diff --git a/kittengroomer/data/PDFA_def.ps b/kittengroomer/data/PDFA_def.ps new file mode 100644 index 0000000..f0ff0d1 --- /dev/null +++ b/kittengroomer/data/PDFA_def.ps @@ -0,0 +1,40 @@ +%! +% This is a sample prefix file for creating a PDF/A document. +% Feel free to modify entries marked with "Customize". +% This assumes an ICC profile to reside in the file (ISO Coated sb.icc), +% unless the user modifies the corresponding line below. + +% Define entries in the document Info dictionary : +/ICCProfile (srgb.icc) % Customise +def + +[ /Title (Title) % Customise + /DOCINFO pdfmark + +% Define an ICC profile : + +[/_objdef {icc_PDFA} /type /stream /OBJ pdfmark +[{icc_PDFA} +<< + /N currentpagedevice /ProcessColorModel known { + currentpagedevice /ProcessColorModel get dup /DeviceGray eq + {pop 1} { + /DeviceRGB eq + {3}{4} ifelse + } ifelse + } { + (ERROR, unable to determine ProcessColorModel) == flush + } ifelse +>> /PUT pdfmark +[{icc_PDFA} ICCProfile (r) file /PUT pdfmark + +% Define the output intent dictionary : + +[/_objdef {OutputIntent_PDFA} /type /dict /OBJ pdfmark +[{OutputIntent_PDFA} << + /Type /OutputIntent % Must be so (the standard requires). + /S /GTS_PDFA1 % Must be so (the standard requires). + /DestOutputProfile {icc_PDFA} % Must be so (see above). + /OutputConditionIdentifier (sRGB) % Customize +>> /PUT pdfmark +[{Catalog} <> /PUT pdfmark diff --git a/kittengroomer/data/srgb.icc b/kittengroomer/data/srgb.icc new file mode 100644 index 0000000..627e8fe Binary files /dev/null and b/kittengroomer/data/srgb.icc differ diff --git a/kittengroomer/helpers.py b/kittengroomer/helpers.py index 893be37..85aec0e 100644 --- a/kittengroomer/helpers.py +++ b/kittengroomer/helpers.py @@ -94,6 +94,8 @@ class KittenGroomerBase(object): quickSetup(file=self.log_processing) self.log_name = log.name('files') + self.ressources_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data') + os.environ["PATH"] += os.pathsep + self.ressources_path self.cur_file = None @@ -113,12 +115,16 @@ class KittenGroomerBase(object): if not os.path.exists(directory): os.makedirs(directory) - def _safe_copy(self): + def _safe_copy(self, src=None, dst=None): ''' Copy a file and create directory if needed ''' + if src is None: + src = self.cur_file.src_path + if dst is None: + dst = self.cur_file.dst_path try: - dst_path, filename = os.path.split(self.cur_file.dst_path) + dst_path, filename = os.path.split(dst) self._safe_mkdir(dst_path) - shutil.copy(self.cur_file.src_path, self.cur_file.dst_path) + shutil.copy(src, dst) return True except Exception as e: # TODO: Logfile diff --git a/setup.py b/setup.py index d5b9c9b..010a374 100644 --- a/setup.py +++ b/setup.py @@ -12,6 +12,8 @@ setup( description='Standalone CIRCLean/KittenGroomer code.', packages=['kittengroomer'], scripts=['bin/generic.py', 'bin/pier9.py'], + include_package_data = True, + package_data = {'data': ['PDFA_def.ps','srgb.icc']}, classifiers=[ 'License :: OSI Approved :: BSD License', 'Development Status :: 5 - Production/Stable',