From e8537592d7026663b1c13f6b7dd5640eca124a2e Mon Sep 17 00:00:00 2001 From: Sami Mokaddem Date: Mon, 18 Nov 2024 09:54:12 +0100 Subject: [PATCH] chg: [expansion:convert_markdown_to_pdf] Better support of margins and added installation notes - Add to introduce hacky code as wkhtmltopdf could not correctly parse margins and other options such as --disable-smart-shrinking when passed by pandoc --- .../expansion/convert_markdown_to_pdf.py | 131 ++++++++++++++++-- 1 file changed, 120 insertions(+), 11 deletions(-) diff --git a/misp_modules/modules/expansion/convert_markdown_to_pdf.py b/misp_modules/modules/expansion/convert_markdown_to_pdf.py index f715d2de..5023c12b 100755 --- a/misp_modules/modules/expansion/convert_markdown_to_pdf.py +++ b/misp_modules/modules/expansion/convert_markdown_to_pdf.py @@ -3,38 +3,147 @@ import json import base64 import pandoc +import random +import string +import subprocess +import os +import shutil + + +installationNotes = ''' +1. Install pandoc for your distribution +2. Install wkhtmltopdf + - Ensure You have install the version with patched qt + - Ensure it supports margin options + - You can check the above by inspecting the extended help `wkhtmltopdf --extended-help` +3. Install mermaid + - `npm install --global @mermaid-js/mermaid-cli` +4. Install the pandoc-mermaid-filter from https://github.com/DavidCruciani/pandoc-mermaid-filter + - Easiest is to install the following: + ```bash + pip3 install git+https://github.com/DavidCruciani/pandoc-mermaid-filter + ``` +''' misperrors = {'error': 'Error'} mispattributes = {'input': ['text'], 'output': ['text']} moduleinfo = { - 'version': '0.2', + 'version': '0.3', 'author': 'Sami Mokaddem', - 'description': 'Render the markdown (under GFM) into PDF. Requires pandoc (https://pandoc.org/) and wkhtmltopdf (https://wkhtmltopdf.org/).', + 'description': 'Render the markdown (under GFM) into PDF. Requires pandoc (https://pandoc.org/), wkhtmltopdf (https://wkhtmltopdf.org/) and mermaid dependencies.', 'module-type': ['expansion'], 'name': 'Markdown to PDF converter', 'logo': '', 'requirements': ['pandoc'], 'features': '', - 'references': [], + 'references': [installationNotes], 'input': '', 'output': '', } moduleconfig = [ - 'margin', ] +def randomFilename(length=10): + characters = string.ascii_lowercase + string.digits # Lowercase letters and digits + return ''.join(random.choices(characters, k=length)) def convert(markdown, margin='3'): doc = pandoc.read(markdown, format='gfm') - options = [ - '--pdf-engine=wkhtmltopdf', - f'-V margin-left={margin}', - f'-V margin-right={margin}', - f'-V margin-top={margin}', - f'-V margin-bottom={margin}', + + elt = doc + + # wrap/unwrap Inline or MetaInlines into [Inline] + if isinstance(elt, pandoc.types.Inline): + inline = elt + elt = [inline] + elif isinstance(elt, pandoc.types.MetaInlines): + meta_inlines = elt + elt = meta_inlines[0] + + # wrap [Inline] into a Plain element + if isinstance(elt, list) and all(isinstance(elt_, pandoc.types.Inline) for elt_ in elt): + inlines = elt + elt = pandoc.types.Plain(inlines) + + # wrap/unwrap Block or MetaBlocks into [Block] + if isinstance(elt, pandoc.types.Block): + block = elt + elt = [block] + elif isinstance(elt, pandoc.types.MetaBlocks): + meta_blocks = elt + elt = meta_blocks[0] + + # wrap [Block] into a Pandoc element + if isinstance(elt, list) and all(isinstance(elt_, pandoc.types.Block) for elt_ in elt): + blocks = elt + elt = pandoc.types.Pandoc(pandoc.types.Meta({}), blocks) + + if not isinstance(elt, pandoc.types.Pandoc): + raise TypeError(f"{elt!r} is not a Pandoc, Block or Inline instance.") + + doc = elt + + # options = [ + # '--pdf-engine=wkhtmltopdf', + # f'-V margin-left={margin}', + # f'-V margin-right={margin}', + # f'-V margin-top={margin}', + # f'-V margin-bottom={margin}', + # '--pdf-engine-opt="--disable-smart-shrinking"', + # ] + randomFn = randomFilename() + command = [ + "/usr/bin/pandoc", + "-t", "pdf", + "-o", f"/tmp/{randomFn}/output", + "--pdf-engine=wkhtmltopdf", + "-V", f"margin-left={margin}", + "-V", f"margin-right={margin}", + "-V", f"margin-top={margin}", + "-V", f"margin-bottom={margin}", + "--pdf-engine-opt=--disable-smart-shrinking", + "--filter=pandoc-mermaid", + "-f", "json", + f"/tmp/{randomFn}/input.js" ] - converted = pandoc.write(doc, format='pdf', options=options) + # try: + # # For some reasons, options are not passed correctly or not parsed correctly by wkhtmltopdf.. + # # converted = pandoc.write(doc, format='pdf', options=options) + # except Exception as e: + # print(e) + + os.makedirs(f'/tmp/{randomFn}', exist_ok=True) + # Write parsed file structure to be fed to the converter + with open(f'/tmp/{randomFn}/input.js', 'bw') as f: + configuration = pandoc.configure(read=True) + if pandoc.utils.version_key(configuration["pandoc_types_version"]) < [1, 17]: + json_ = pandoc.write_json_v1(doc) + else: + json_ = pandoc.write_json_v2(doc) + json_str = json.dumps(json_) + f.write(json_str.encode("utf-8")) + + # Do conversion by manually invoking pandoc + try: + subprocess.run(command, check=True) + except subprocess.CalledProcessError as e: + print(f"Command failed with error: {e}") + + # Read output and returns it + with open(f'/tmp/{randomFn}/output', 'br') as f: + converted = f.read() + + # Clean up generated files + folderPath = f'/tmp/{randomFn}' + try: + shutil.rmtree(folderPath) + print(f"Folder '{folderPath}' deleted successfully.") + except FileNotFoundError: + print(f"Folder '{folderPath}' does not exist.") + except Exception as e: + print(f"Error deleting folder '{folderPath}': {e}") + return base64.b64encode(converted).decode() def handler(q=False):