Fix plaintext markdown converter

pull/4795/head
Chocobozzz 2022-02-04 10:31:54 +01:00
parent 457c83486e
commit c68e2b2d22
No known key found for this signature in database
GPG Key ID: 583A612D890159BE
7 changed files with 104 additions and 27 deletions

View File

@ -1,6 +1,6 @@
import { Injectable } from '@angular/core'
import { getCustomMarkupSanitizeOptions, getDefaultSanitizeOptions } from '@shared/core-utils/renderer/html'
import { LinkifierService } from './linkifier.service'
import { getCustomMarkupSanitizeOptions, getSanitizeOptions } from '@shared/core-utils/renderer/html'
@Injectable()
export class HtmlRendererService {
@ -30,7 +30,7 @@ export class HtmlRendererService {
const options = additionalAllowedTags.length !== 0
? getCustomMarkupSanitizeOptions(additionalAllowedTags)
: getSanitizeOptions()
: getDefaultSanitizeOptions()
return this.sanitizeHtml(html, options)
}

View File

@ -1,6 +1,6 @@
import express from 'express'
import Feed from 'pfeed'
import { mdToPlainText, toSafeHtml } from '@server/helpers/markdown'
import { mdToOneLinePlainText, toSafeHtml } from '@server/helpers/markdown'
import { getServerActor } from '@server/models/application/application'
import { getCategoryLabel } from '@server/models/video/formatter/video-format-utils'
import { VideoInclude } from '@shared/models'
@ -236,7 +236,7 @@ function initFeed (parameters: {
return new Feed({
title: name,
description: mdToPlainText(description),
description: mdToOneLinePlainText(description),
// updated: TODO: somehowGetLatestUpdate, // optional, default = today
id: webserverUrl,
link: webserverUrl,
@ -299,7 +299,7 @@ function addVideosToFeed (feed, videos: VideoModel[]) {
title: video.name,
id: video.url,
link: WEBSERVER.URL + video.getWatchStaticPath(),
description: mdToPlainText(video.getTruncatedDescription()),
description: mdToOneLinePlainText(video.getTruncatedDescription()),
content: toSafeHtml(video.description),
author: [
{

View File

@ -1,14 +1,14 @@
import { getSanitizeOptions, TEXT_WITH_HTML_RULES } from '@shared/core-utils'
import { getDefaultSanitizeOptions, getTextOnlySanitizeOptions, TEXT_WITH_HTML_RULES } from '@shared/core-utils'
const sanitizeOptions = getSanitizeOptions()
const defaultSanitizeOptions = getDefaultSanitizeOptions()
const textOnlySanitizeOptions = getTextOnlySanitizeOptions()
const sanitizeHtml = require('sanitize-html')
const markdownItEmoji = require('markdown-it-emoji/light')
const MarkdownItClass = require('markdown-it')
const markdownIt = new MarkdownItClass('default', { linkify: true, breaks: true, html: true })
markdownIt.enable(TEXT_WITH_HTML_RULES)
markdownIt.use(markdownItEmoji)
const markdownItWithHTML = new MarkdownItClass('default', { linkify: true, breaks: true, html: true })
const markdownItWithoutHTML = new MarkdownItClass('default', { linkify: true, breaks: true, html: false })
const toSafeHtml = (text: string) => {
if (!text) return ''
@ -17,29 +17,65 @@ const toSafeHtml = (text: string) => {
const textWithLineFeed = text.replace(/<br.?\/?>/g, '\r\n')
// Convert possible markdown (emojis, emphasis and lists) to html
const html = markdownIt.render(textWithLineFeed)
const html = markdownItWithHTML.enable(TEXT_WITH_HTML_RULES)
.use(markdownItEmoji)
.render(textWithLineFeed)
// Convert to safe Html
return sanitizeHtml(html, sanitizeOptions)
return sanitizeHtml(html, defaultSanitizeOptions)
}
const mdToPlainText = (text: string) => {
const mdToOneLinePlainText = (text: string) => {
if (!text) return ''
// Convert possible markdown (emojis, emphasis and lists) to html
const html = markdownIt.render(text)
markdownItWithoutHTML.use(markdownItEmoji)
.use(plainTextPlugin)
.render(text)
// Convert to safe Html
const safeHtml = sanitizeHtml(html, sanitizeOptions)
return safeHtml.replace(/<[^>]+>/g, '')
.replace(/\n$/, '')
.replace(/\n/g, ', ')
return sanitizeHtml(markdownItWithoutHTML.plainText, textOnlySanitizeOptions)
}
// ---------------------------------------------------------------------------
export {
toSafeHtml,
mdToPlainText
mdToOneLinePlainText
}
// ---------------------------------------------------------------------------
// Thanks: https://github.com/wavesheep/markdown-it-plain-text
function plainTextPlugin (markdownIt: any) {
let lastSeparator = ''
function plainTextRule (state: any) {
const text = scan(state.tokens)
markdownIt.plainText = text.replace(/\s+/g, ' ')
}
function scan (tokens: any[]) {
let text = ''
for (const token of tokens) {
if (token.children !== null) {
text += scan(token.children)
continue
}
if (token.type === 'list_item_close') {
lastSeparator = ', '
} else if (/[a-zA-Z]+_close/.test(token.type)) {
lastSeparator = ' '
} else if (token.content) {
text += lastSeparator
text += token.content
}
}
return text
}
markdownIt.core.ruler.push('plainText', plainTextRule)
}

View File

@ -12,7 +12,7 @@ import { HttpStatusCode } from '../../shared/models/http/http-error-codes'
import { VideoPlaylistPrivacy, VideoPrivacy } from '../../shared/models/videos'
import { isTestInstance } from '../helpers/core-utils'
import { logger } from '../helpers/logger'
import { mdToPlainText } from '../helpers/markdown'
import { mdToOneLinePlainText } from '../helpers/markdown'
import { CONFIG } from '../initializers/config'
import {
ACCEPT_HEADERS,
@ -103,7 +103,7 @@ class ClientHtml {
res.status(HttpStatusCode.NOT_FOUND_404)
return html
}
const description = mdToPlainText(video.description)
const description = mdToOneLinePlainText(video.description)
let customHtml = ClientHtml.addTitleTag(html, video.name)
customHtml = ClientHtml.addDescriptionTag(customHtml, description)
@ -164,7 +164,7 @@ class ClientHtml {
return html
}
const description = mdToPlainText(videoPlaylist.description)
const description = mdToOneLinePlainText(videoPlaylist.description)
let customHtml = ClientHtml.addTitleTag(html, videoPlaylist.name)
customHtml = ClientHtml.addDescriptionTag(customHtml, description)
@ -263,7 +263,7 @@ class ClientHtml {
return ClientHtml.getIndexHTML(req, res)
}
const description = mdToPlainText(entity.description)
const description = mdToOneLinePlainText(entity.description)
let customHtml = ClientHtml.addTitleTag(html, entity.getDisplayName())
customHtml = ClientHtml.addDescriptionTag(customHtml, description)

View File

@ -1,4 +1,5 @@
import './image'
import './core-utils'
import './comment-model'
import './markdown'
import './request'

View File

@ -0,0 +1,34 @@
/* eslint-disable @typescript-eslint/no-unused-expressions,@typescript-eslint/require-await */
import 'mocha'
import { mdToOneLinePlainText } from '@server/helpers/markdown'
import { expect } from 'chai'
describe('Markdown helpers', function () {
describe('Plain text', function () {
it('Should convert a list to plain text', function () {
const result = mdToOneLinePlainText(`* list 1
* list 2
* list 3`)
expect(result).to.equal('list 1, list 2, list 3')
})
it('Should convert a list with indentation to plain text', function () {
const result = mdToOneLinePlainText(`Hello:
* list 1
* list 2
* list 3`)
expect(result).to.equal('Hello: list 1, list 2, list 3')
})
it('Should convert HTML to plain text', function () {
const result = mdToOneLinePlainText(`**Hello** <strong>coucou</strong>`)
expect(result).to.equal('Hello coucou')
})
})
})

View File

@ -1,4 +1,4 @@
export function getSanitizeOptions () {
export function getDefaultSanitizeOptions () {
return {
allowedTags: [ 'a', 'p', 'span', 'br', 'strong', 'em', 'ul', 'ol', 'li' ],
allowedSchemes: [ 'http', 'https' ],
@ -23,8 +23,14 @@ export function getSanitizeOptions () {
}
}
export function getTextOnlySanitizeOptions () {
return {
allowedTags: [] as string[]
}
}
export function getCustomMarkupSanitizeOptions (additionalAllowedTags: string[] = []) {
const base = getSanitizeOptions()
const base = getDefaultSanitizeOptions()
return {
allowedTags: [