chg: [expansion:convert_markdown_to_pdf] Better support of margins and added installation notes

- Add to introduce hacky code  as wkhtmltopdf could not correctly parse margins
and other options such as --disable-smart-shrinking when passed by pandoc
pull/702/head
Sami Mokaddem 2024-11-18 09:54:12 +01:00
parent e17aad3aeb
commit e8537592d7
No known key found for this signature in database
GPG Key ID: 164C473F627A06FA
1 changed files with 120 additions and 11 deletions

View File

@ -3,38 +3,147 @@
import json
import base64
import pandoc
import random
import string
import subprocess
import os
import shutil
installationNotes = '''
1. Install pandoc for your distribution
2. Install wkhtmltopdf
- Ensure You have install the version with patched qt
- Ensure it supports margin options
- You can check the above by inspecting the extended help `wkhtmltopdf --extended-help`
3. Install mermaid
- `npm install --global @mermaid-js/mermaid-cli`
4. Install the pandoc-mermaid-filter from https://github.com/DavidCruciani/pandoc-mermaid-filter
- Easiest is to install the following:
```bash
pip3 install git+https://github.com/DavidCruciani/pandoc-mermaid-filter
```
'''
misperrors = {'error': 'Error'}
mispattributes = {'input': ['text'], 'output': ['text']}
moduleinfo = {
'version': '0.2',
'version': '0.3',
'author': 'Sami Mokaddem',
'description': 'Render the markdown (under GFM) into PDF. Requires pandoc (https://pandoc.org/) and wkhtmltopdf (https://wkhtmltopdf.org/).',
'description': 'Render the markdown (under GFM) into PDF. Requires pandoc (https://pandoc.org/), wkhtmltopdf (https://wkhtmltopdf.org/) and mermaid dependencies.',
'module-type': ['expansion'],
'name': 'Markdown to PDF converter',
'logo': '',
'requirements': ['pandoc'],
'features': '',
'references': [],
'references': [installationNotes],
'input': '',
'output': '',
}
moduleconfig = [
'margin',
]
def randomFilename(length=10):
characters = string.ascii_lowercase + string.digits # Lowercase letters and digits
return ''.join(random.choices(characters, k=length))
def convert(markdown, margin='3'):
doc = pandoc.read(markdown, format='gfm')
options = [
'--pdf-engine=wkhtmltopdf',
f'-V margin-left={margin}',
f'-V margin-right={margin}',
f'-V margin-top={margin}',
f'-V margin-bottom={margin}',
elt = doc
# wrap/unwrap Inline or MetaInlines into [Inline]
if isinstance(elt, pandoc.types.Inline):
inline = elt
elt = [inline]
elif isinstance(elt, pandoc.types.MetaInlines):
meta_inlines = elt
elt = meta_inlines[0]
# wrap [Inline] into a Plain element
if isinstance(elt, list) and all(isinstance(elt_, pandoc.types.Inline) for elt_ in elt):
inlines = elt
elt = pandoc.types.Plain(inlines)
# wrap/unwrap Block or MetaBlocks into [Block]
if isinstance(elt, pandoc.types.Block):
block = elt
elt = [block]
elif isinstance(elt, pandoc.types.MetaBlocks):
meta_blocks = elt
elt = meta_blocks[0]
# wrap [Block] into a Pandoc element
if isinstance(elt, list) and all(isinstance(elt_, pandoc.types.Block) for elt_ in elt):
blocks = elt
elt = pandoc.types.Pandoc(pandoc.types.Meta({}), blocks)
if not isinstance(elt, pandoc.types.Pandoc):
raise TypeError(f"{elt!r} is not a Pandoc, Block or Inline instance.")
doc = elt
# options = [
# '--pdf-engine=wkhtmltopdf',
# f'-V margin-left={margin}',
# f'-V margin-right={margin}',
# f'-V margin-top={margin}',
# f'-V margin-bottom={margin}',
# '--pdf-engine-opt="--disable-smart-shrinking"',
# ]
randomFn = randomFilename()
command = [
"/usr/bin/pandoc",
"-t", "pdf",
"-o", f"/tmp/{randomFn}/output",
"--pdf-engine=wkhtmltopdf",
"-V", f"margin-left={margin}",
"-V", f"margin-right={margin}",
"-V", f"margin-top={margin}",
"-V", f"margin-bottom={margin}",
"--pdf-engine-opt=--disable-smart-shrinking",
"--filter=pandoc-mermaid",
"-f", "json",
f"/tmp/{randomFn}/input.js"
]
converted = pandoc.write(doc, format='pdf', options=options)
# try:
# # For some reasons, options are not passed correctly or not parsed correctly by wkhtmltopdf..
# # converted = pandoc.write(doc, format='pdf', options=options)
# except Exception as e:
# print(e)
os.makedirs(f'/tmp/{randomFn}', exist_ok=True)
# Write parsed file structure to be fed to the converter
with open(f'/tmp/{randomFn}/input.js', 'bw') as f:
configuration = pandoc.configure(read=True)
if pandoc.utils.version_key(configuration["pandoc_types_version"]) < [1, 17]:
json_ = pandoc.write_json_v1(doc)
else:
json_ = pandoc.write_json_v2(doc)
json_str = json.dumps(json_)
f.write(json_str.encode("utf-8"))
# Do conversion by manually invoking pandoc
try:
subprocess.run(command, check=True)
except subprocess.CalledProcessError as e:
print(f"Command failed with error: {e}")
# Read output and returns it
with open(f'/tmp/{randomFn}/output', 'br') as f:
converted = f.read()
# Clean up generated files
folderPath = f'/tmp/{randomFn}'
try:
shutil.rmtree(folderPath)
print(f"Folder '{folderPath}' deleted successfully.")
except FileNotFoundError:
print(f"Folder '{folderPath}' does not exist.")
except Exception as e:
print(f"Error deleting folder '{folderPath}': {e}")
return base64.b64encode(converted).decode()
def handler(q=False):