chg: [expansion:convert_markdown_to_pdf] Better support of margins and added installation notes

- Add to introduce hacky code as wkhtmltopdf could not correctly parse margins and other options such as --disable-smart-shrinking when passed by pandoc
2024-11-18 09:54:12 +01:00 · 2024-11-18 09:54:12 +01:00 · e8537592d7
parent e17aad3aeb
commit e8537592d7
1 changed files with 120 additions and 11 deletions
--- a/misp_modules/modules/expansion/convert_markdown_to_pdf.py
+++ b/misp_modules/modules/expansion/convert_markdown_to_pdf.py
@ -3,38 +3,147 @@
 import json
 import base64
 import pandoc
+import random
+import string
+import subprocess
+import os
+import shutil
+
+
+installationNotes = '''
+1. Install pandoc for your distribution
+2. Install wkhtmltopdf
+    - Ensure You have install the version with patched qt
+    - Ensure it supports margin options
+    - You can check the above by inspecting the extended help `wkhtmltopdf --extended-help`
+3. Install mermaid
+    - `npm install --global @mermaid-js/mermaid-cli`
+4. Install the pandoc-mermaid-filter from https://github.com/DavidCruciani/pandoc-mermaid-filter
+    - Easiest is to install the following:
+    ```bash
+        pip3 install git+https://github.com/DavidCruciani/pandoc-mermaid-filter
+    ```
+'''

 misperrors = {'error': 'Error'}
 mispattributes = {'input': ['text'], 'output': ['text']}
 moduleinfo = {
-    'version': '0.2',
+    'version': '0.3',
    'author': 'Sami Mokaddem',
-    'description': 'Render the markdown (under GFM) into PDF. Requires pandoc (https://pandoc.org/) and wkhtmltopdf (https://wkhtmltopdf.org/).',
+    'description': 'Render the markdown (under GFM) into PDF. Requires pandoc (https://pandoc.org/), wkhtmltopdf (https://wkhtmltopdf.org/) and mermaid dependencies.',
    'module-type': ['expansion'],
    'name': 'Markdown to PDF converter',
    'logo': '',
    'requirements': ['pandoc'],
    'features': '',
-    'references': [],
+    'references': [installationNotes],
    'input': '',
    'output': '',
 }

 moduleconfig = [
-    'margin',
 ]

+def randomFilename(length=10):
+    characters = string.ascii_lowercase + string.digits  # Lowercase letters and digits
+    return ''.join(random.choices(characters, k=length))

 def convert(markdown, margin='3'):
    doc = pandoc.read(markdown, format='gfm')
-    options = [
-        '--pdf-engine=wkhtmltopdf',
-        f'-V margin-left={margin}',
-        f'-V margin-right={margin}',
-        f'-V margin-top={margin}',
-        f'-V margin-bottom={margin}',
+
+    elt = doc
+
+    # wrap/unwrap Inline or MetaInlines into [Inline]
+    if isinstance(elt, pandoc.types.Inline):
+        inline = elt
+        elt = [inline]
+    elif isinstance(elt, pandoc.types.MetaInlines):
+        meta_inlines = elt
+        elt = meta_inlines[0]
+
+    # wrap [Inline] into a Plain element
+    if isinstance(elt, list) and all(isinstance(elt_, pandoc.types.Inline) for elt_ in elt):
+        inlines = elt
+        elt = pandoc.types.Plain(inlines)
+
+    # wrap/unwrap Block or MetaBlocks into [Block]
+    if isinstance(elt, pandoc.types.Block):
+        block = elt
+        elt = [block]
+    elif isinstance(elt, pandoc.types.MetaBlocks):
+        meta_blocks = elt
+        elt = meta_blocks[0]
+
+    # wrap [Block] into a Pandoc element
+    if isinstance(elt, list) and all(isinstance(elt_, pandoc.types.Block) for elt_ in elt):
+        blocks = elt
+        elt = pandoc.types.Pandoc(pandoc.types.Meta({}), blocks)
+
+    if not isinstance(elt, pandoc.types.Pandoc):
+        raise TypeError(f"{elt!r} is not a Pandoc, Block or Inline instance.")
+
+    doc = elt
+
+    # options = [
+    #     '--pdf-engine=wkhtmltopdf',
+    #     f'-V margin-left={margin}',
+    #     f'-V margin-right={margin}',
+    #     f'-V margin-top={margin}',
+    #     f'-V margin-bottom={margin}',
+    #     '--pdf-engine-opt="--disable-smart-shrinking"',
+    # ]
+    randomFn = randomFilename()
+    command = [
+        "/usr/bin/pandoc",
+        "-t", "pdf",
+        "-o", f"/tmp/{randomFn}/output",
+        "--pdf-engine=wkhtmltopdf",
+        "-V", f"margin-left={margin}",
+        "-V", f"margin-right={margin}",
+        "-V", f"margin-top={margin}",
+        "-V", f"margin-bottom={margin}",
+        "--pdf-engine-opt=--disable-smart-shrinking",
+        "--filter=pandoc-mermaid",
+        "-f", "json",
+        f"/tmp/{randomFn}/input.js"
    ]
-    converted = pandoc.write(doc, format='pdf', options=options)
+    # try:
+    #     # For some reasons, options are not passed correctly or not parsed correctly by wkhtmltopdf..
+    #     # converted = pandoc.write(doc, format='pdf', options=options)
+    # except Exception as e:
+    #     print(e)
+
+    os.makedirs(f'/tmp/{randomFn}', exist_ok=True)
+    # Write parsed file structure to be fed to the converter
+    with open(f'/tmp/{randomFn}/input.js', 'bw') as f:
+        configuration = pandoc.configure(read=True)
+        if pandoc.utils.version_key(configuration["pandoc_types_version"]) < [1, 17]:
+            json_ = pandoc.write_json_v1(doc)
+        else:
+            json_ = pandoc.write_json_v2(doc)
+        json_str = json.dumps(json_)
+        f.write(json_str.encode("utf-8"))
+
+    # Do conversion by manually invoking pandoc
+    try:
+        subprocess.run(command, check=True)
+    except subprocess.CalledProcessError as e:
+        print(f"Command failed with error: {e}")
+
+    # Read output and returns it
+    with open(f'/tmp/{randomFn}/output', 'br') as f:
+        converted = f.read()
+
+    # Clean up generated files
+    folderPath = f'/tmp/{randomFn}'
+    try:
+        shutil.rmtree(folderPath)
+        print(f"Folder '{folderPath}' deleted successfully.")
+    except FileNotFoundError:
+        print(f"Folder '{folderPath}' does not exist.")
+    except Exception as e:
+        print(f"Error deleting folder '{folderPath}': {e}")
+
    return base64.b64encode(converted).decode()

 def handler(q=False):