Merge pull request #5244 from akissinger/katex

LaTeX rendering in element-web using KaTeX
pull/21833/head
J. Ryan Stinnett 2020-11-27 11:55:26 +00:00 committed by GitHub
commit 826466179a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 107 additions and 5 deletions

View File

@ -76,6 +76,8 @@
"highlight.js": "^10.1.2",
"html-entities": "^1.3.1",
"is-ip": "^2.0.0",
"katex": "^0.12.0",
"cheerio": "^1.0.0-rc.3",
"linkifyjs": "^2.1.9",
"lodash": "^4.17.19",
"matrix-js-sdk": "github:matrix-org/matrix-js-sdk#develop",

View File

@ -27,9 +27,12 @@ import _linkifyString from 'linkifyjs/string';
import classNames from 'classnames';
import EMOJIBASE_REGEX from 'emojibase-regex';
import url from 'url';
import katex from 'katex';
import { AllHtmlEntities } from 'html-entities';
import SettingsStore from './settings/SettingsStore';
import cheerio from 'cheerio';
import {MatrixClientPeg} from './MatrixClientPeg';
import SettingsStore from './settings/SettingsStore';
import {tryTransformPermalinkToLocalHref} from "./utils/permalinks/Permalinks";
import {SHORTCODE_TO_EMOJI, getEmojiFromUnicode} from "./emoji";
import ReplyThread from "./components/views/elements/ReplyThread";
@ -240,7 +243,8 @@ const sanitizeHtmlParams: IExtendedSanitizeOptions = {
allowedAttributes: {
// custom ones first:
font: ['color', 'data-mx-bg-color', 'data-mx-color', 'style'], // custom to matrix
span: ['data-mx-bg-color', 'data-mx-color', 'data-mx-spoiler', 'style'], // custom to matrix
span: ['data-mx-maths', 'data-mx-bg-color', 'data-mx-color', 'data-mx-spoiler', 'style'], // custom to matrix
div: ['data-mx-maths'],
a: ['href', 'name', 'target', 'rel'], // remote target: custom to matrix
img: ['src', 'width', 'height', 'alt', 'title'],
ol: ['start'],
@ -414,6 +418,21 @@ export function bodyToHtml(content: IContent, highlights: string[], opts: IOpts
if (isHtmlMessage) {
isDisplayedWithHtml = true;
safeBody = sanitizeHtml(formattedBody, sanitizeParams);
if (SettingsStore.getValue("feature_latex_maths")) {
const phtml = cheerio.load(safeBody,
{ _useHtmlParser2: true, decodeEntities: false })
phtml('div, span[data-mx-maths!=""]').replaceWith(function(i, e) {
return katex.renderToString(
AllHtmlEntities.decode(phtml(e).attr('data-mx-maths')),
{
throwOnError: false,
displayMode: e.name == 'div',
output: "htmlAndMathml",
});
});
safeBody = phtml.html();
}
}
} finally {
delete sanitizeParams.textFilter;
@ -515,7 +534,6 @@ export function checkBlockNode(node: Node) {
case "H6":
case "PRE":
case "BLOCKQUOTE":
case "DIV":
case "P":
case "UL":
case "OL":
@ -528,6 +546,9 @@ export function checkBlockNode(node: Node) {
case "TH":
case "TD":
return true;
case "DIV":
// don't treat math nodes as block nodes for deserializing
return !(node as HTMLElement).hasAttribute("data-mx-maths");
default:
return false;
}

View File

@ -23,6 +23,11 @@ const ALLOWED_HTML_TAGS = ['sub', 'sup', 'del', 'u'];
const TEXT_NODES = ['text', 'softbreak', 'linebreak', 'paragraph', 'document'];
function is_allowed_html_tag(node) {
if (node.literal != null &&
node.literal.match('^<((div|span) data-mx-maths="[^"]*"|\/(div|span))>$') != null) {
return true;
}
// Regex won't work for tags with attrs, but we only
// allow <del> anyway.
const matches = /^<\/?(.*)>$/.exec(node.literal);
@ -30,6 +35,7 @@ function is_allowed_html_tag(node) {
const tag = matches[1];
return ALLOWED_HTML_TAGS.indexOf(tag) > -1;
}
return false;
}

View File

@ -21,6 +21,7 @@ import { walkDOMDepthFirst } from "./dom";
import { checkBlockNode } from "../HtmlUtils";
import { getPrimaryPermalinkEntity } from "../utils/permalinks/Permalinks";
import { PartCreator } from "./parts";
import SdkConfig from "../SdkConfig";
function parseAtRoomMentions(text: string, partCreator: PartCreator) {
const ATROOM = "@room";
@ -130,6 +131,23 @@ function parseElement(n: HTMLElement, partCreator: PartCreator, lastNode: HTMLEl
}
break;
}
case "DIV":
case "SPAN": {
// math nodes are translated back into delimited latex strings
if (n.hasAttribute("data-mx-maths")) {
const delimLeft = (n.nodeName == "SPAN") ?
(SdkConfig.get()['latex_maths_delims'] || {})['inline_left'] || "$" :
(SdkConfig.get()['latex_maths_delims'] || {})['display_left'] || "$$";
const delimRight = (n.nodeName == "SPAN") ?
(SdkConfig.get()['latex_maths_delims'] || {})['inline_right'] || "$" :
(SdkConfig.get()['latex_maths_delims'] || {})['display_right'] || "$$";
const tex = n.getAttribute("data-mx-maths");
return partCreator.plain(delimLeft + tex + delimRight);
} else if (!checkDescendInto(n)) {
return partCreator.plain(n.textContent);
}
break;
}
case "OL":
state.listIndex.push((<HTMLOListElement>n).start || 1);
/* falls through */

View File

@ -18,6 +18,10 @@ limitations under the License.
import Markdown from '../Markdown';
import {makeGenericPermalink} from "../utils/permalinks/Permalinks";
import EditorModel from "./model";
import { AllHtmlEntities } from 'html-entities';
import SettingsStore from '../settings/SettingsStore';
import SdkConfig from '../SdkConfig';
import cheerio from 'cheerio';
export function mdSerialize(model: EditorModel) {
return model.parts.reduce((html, part) => {
@ -38,10 +42,43 @@ export function mdSerialize(model: EditorModel) {
}
export function htmlSerializeIfNeeded(model: EditorModel, {forceHTML = false} = {}) {
const md = mdSerialize(model);
let md = mdSerialize(model);
if (SettingsStore.getValue("feature_latex_maths")) {
const displayPattern = (SdkConfig.get()['latex_maths_delims'] || {})['display_pattern'] ||
"\\$\\$(([^$]|\\\\\\$)*)\\$\\$";
const inlinePattern = (SdkConfig.get()['latex_maths_delims'] || {})['inline_pattern'] ||
"\\$(([^$]|\\\\\\$)*)\\$";
md = md.replace(RegExp(displayPattern, "gm"), function(m, p1) {
const p1e = AllHtmlEntities.encode(p1);
return `<div data-mx-maths="${p1e}">\n\n</div>\n\n`;
});
md = md.replace(RegExp(inlinePattern, "gm"), function(m, p1) {
const p1e = AllHtmlEntities.encode(p1);
return `<span data-mx-maths="${p1e}"></span>`;
});
// make sure div tags always start on a new line, otherwise it will confuse
// the markdown parser
md = md.replace(/(.)<div/g, function(m, p1) { return `${p1}\n<div`; });
}
const parser = new Markdown(md);
if (!parser.isPlainText() || forceHTML) {
return parser.toHTML();
// feed Markdown output to HTML parser
const phtml = cheerio.load(parser.toHTML(),
{ _useHtmlParser2: true, decodeEntities: false })
// add fallback output for latex math, which should not be interpreted as markdown
phtml('div, span').each(function(i, e) {
const tex = phtml(e).attr('data-mx-maths')
if (tex) {
phtml(e).html(`<code>${tex}</code>`)
}
});
return phtml.html();
}
// ensure removal of escape backslashes in non-Markdown messages
if (md.indexOf("\\") > -1) {

View File

@ -755,6 +755,7 @@
"%(senderName)s: %(reaction)s": "%(senderName)s: %(reaction)s",
"%(senderName)s: %(stickerName)s": "%(senderName)s: %(stickerName)s",
"Change notification settings": "Change notification settings",
"Render LaTeX maths in messages": "Render LaTeX maths in messages",
"Communities v2 prototypes. Requires compatible homeserver. Highly experimental - use with caution.": "Communities v2 prototypes. Requires compatible homeserver. Highly experimental - use with caution.",
"New spinner design": "New spinner design",
"Message Pinning": "Message Pinning",

View File

@ -117,6 +117,12 @@ export interface ISetting {
}
export const SETTINGS: {[setting: string]: ISetting} = {
"feature_latex_maths": {
isFeature: true,
displayName: _td("Render LaTeX maths in messages"),
supportedLevels: LEVELS_FEATURE,
default: false,
},
"feature_communities_v2_prototypes": {
isFeature: true,
displayName: _td(

View File

@ -36,6 +36,7 @@ describe("<TextualBody />", () => {
MatrixClientPeg.matrixClient = {
getRoom: () => mkStubRoom("room_id"),
getAccountData: () => undefined,
isGuest: () => false,
};
const ev = mkEvent({
@ -59,6 +60,7 @@ describe("<TextualBody />", () => {
MatrixClientPeg.matrixClient = {
getRoom: () => mkStubRoom("room_id"),
getAccountData: () => undefined,
isGuest: () => false,
};
const ev = mkEvent({
@ -83,6 +85,7 @@ describe("<TextualBody />", () => {
MatrixClientPeg.matrixClient = {
getRoom: () => mkStubRoom("room_id"),
getAccountData: () => undefined,
isGuest: () => false,
};
});
@ -135,6 +138,7 @@ describe("<TextualBody />", () => {
getHomeserverUrl: () => "https://my_server/",
on: () => undefined,
removeListener: () => undefined,
isGuest: () => false,
};
});

View File

@ -6206,6 +6206,13 @@ jsx-ast-utils@^2.4.1:
array-includes "^3.1.1"
object.assign "^4.1.0"
katex@^0.12.0:
version "0.12.0"
resolved "https://registry.yarnpkg.com/katex/-/katex-0.12.0.tgz#2fb1c665dbd2b043edcf8a1f5c555f46beaa0cb9"
integrity sha512-y+8btoc/CK70XqcHqjxiGWBOeIL8upbS0peTPXTvgrh21n1RiWWcIpSWM+4uXq+IAgNh9YYQWdc7LVDPDAEEAg==
dependencies:
commander "^2.19.0"
kind-of@^3.0.2, kind-of@^3.0.3, kind-of@^3.2.0:
version "3.2.2"
resolved "https://registry.yarnpkg.com/kind-of/-/kind-of-3.2.2.tgz#31ea21a734bab9bbb0f32466d893aea51e4a3c64"