Use LaTeX delimiters by default, add /tex command

Since parsing for $'s as maths delimiters is tricky, switch the default to $...$ for inline and \[...\] for display maths as it is used in LaTeX. Add /tex command to explicitly parse in TeX mode, which uses $...$ for inline and $$...$$ for display maths. Signed-off-by: Sven Mäder <maeder@phys.ethz.ch>
2020-12-20 23:14:56 +01:00 · 2020-12-20 23:14:56 +01:00 · 78b3f50bfd
parent fa02630c4e
commit 78b3f50bfd
4 changed files with 99 additions and 26 deletions
--- a/src/SlashCommands.tsx
+++ b/src/SlashCommands.tsx
@ -48,6 +48,7 @@ import SettingsStore from "./settings/SettingsStore";
 import {UIFeature} from "./settings/UIFeature";
 import {CHAT_EFFECTS} from "./effects"
 import CallHandler from "./CallHandler";
+import {markdownSerializeIfNeeded} from './editor/serialize';

 // XXX: workaround for https://github.com/microsoft/TypeScript/issues/31816
 interface HTMLInputEvent extends Event {
@ -223,6 +224,23 @@ export const Commands = [
        },
        category: CommandCategories.messages,
    }),
+    new Command({
+        command: 'tex',
+        args: '<message>',
+        description: _td('Sends a message in TeX mode, using $ and $$ delimiters for maths'),
+        runFn: function(roomId, args) {
+            if (SettingsStore.getValue("feature_latex_maths")) {
+                if (args) {
+                    let html = markdownSerializeIfNeeded(args, {forceHTML: false}, {forceTEX: true});
+                    return success(MatrixClientPeg.get().sendHtmlMessage(roomId, args, html));
+                }
+                return reject(this.getUsage());
+            } else {
+                return reject("Render LaTeX maths in messages needs to be enabled in Labs");
+            }
+        },
+        category: CommandCategories.messages,
+    }),
    new Command({
        command: 'ddg',
        args: '<query>',
--- a/src/editor/deserialize.ts
+++ b/src/editor/deserialize.ts
@ -136,11 +136,11 @@ function parseElement(n: HTMLElement, partCreator: PartCreator, lastNode: HTMLEl
            // math nodes are translated back into delimited latex strings
            if (n.hasAttribute("data-mx-maths")) {
                const delimLeft = (n.nodeName == "SPAN") ?
-                    (SdkConfig.get()['latex_maths_delims'] || {})['inline_left'] || "$" :
-                    (SdkConfig.get()['latex_maths_delims'] || {})['display_left'] || "$$";
+                    (SdkConfig.get()['latex_maths_delims'] || {})['inline_left'] || "\\(" :
+                    (SdkConfig.get()['latex_maths_delims'] || {})['display_left'] || "\\[";
                const delimRight = (n.nodeName == "SPAN") ?
-                    (SdkConfig.get()['latex_maths_delims'] || {})['inline_right'] || "$" :
-                    (SdkConfig.get()['latex_maths_delims'] || {})['display_right'] || "$$";
+                    (SdkConfig.get()['latex_maths_delims'] || {})['inline_right'] || "\\)" :
+                    (SdkConfig.get()['latex_maths_delims'] || {})['display_right'] || "\\]";
                const tex = n.getAttribute("data-mx-maths");
                return partCreator.plain(delimLeft + tex + delimRight);
            } else if (!checkDescendInto(n)) {
--- a/src/editor/serialize.ts
+++ b/src/editor/serialize.ts
@ -41,24 +41,57 @@ export function mdSerialize(model: EditorModel) {
    }, "");
 }

-export function htmlSerializeIfNeeded(model: EditorModel, {forceHTML = false} = {}) {
-    let md = mdSerialize(model);
+export function markdownSerializeIfNeeded(md: string, {forceHTML = false} = {}, {forceTEX = false} = {}) {
+    // copy of raw input to remove unwanted math later
+    const orig = md;

    if (SettingsStore.getValue("feature_latex_maths")) {
-        const displayPattern = (SdkConfig.get()['latex_maths_delims'] || {})['display_pattern'] ||
-            "\\$\\$(([^$]|\\\\\\$)*)\\$\\$";
-        const inlinePattern = (SdkConfig.get()['latex_maths_delims'] || {})['inline_pattern'] ||
-            "\\$(([^$]|\\\\\\$)*)\\$";
+        if (forceTEX) {
+            // detect math with tex delimiters, inline: $...$, display $$...$$
+            // preferably use negative lookbehinds, not supported in all major browsers:
+            // const displayPattern = "^(?<!\\\\)\\$\\$(?![ \\t])(([^$]|\\\\\\$)+?)\\$\\$$";
+            // const inlinePattern = "(?:^|\\s)(?<!\\\\)\\$(?!\\s)(([^$]|\\\\\\$)+?)(?<!\\\\|\\s)\\$";
+
+            // conditions for display math detection ($$...$$):
+            // - left delimiter ($$) is not escaped by a backslash
+            // - pattern starts at the beginning of a line
+            // - left delimiter is not followed by a space or tab character
+            // - pattern ends at the end of a line
+            const displayPattern = "^(?!\\\\)\\$\\$(?![ \\t])(([^$]|\\\\\\$)+?)\\$\\$$";
+
+            // conditions for inline math detection ($...$):
+            // - left and right delimiters ($) are not escaped by backslashes
+            // - pattern starts at the beginning of a line or follows a whitespace character
+            // - left delimiter is not followed by a whitespace character
+            // - right delimiter is not preseeded by a whitespace character
+            const inlinePattern = "(^|\\s)(?!\\\\)\\$(?!\\s)(([^$]|\\\\\\$)*[^\\\\\\s\\$](?:\\\\\\$)?)\\$";

            md = md.replace(RegExp(displayPattern, "gm"), function(m, p1) {
                const p1e = AllHtmlEntities.encode(p1);
                return `<div data-mx-maths="${p1e}">\n\n</div>\n\n`;
            });

-        md = md.replace(RegExp(inlinePattern, "gm"), function(m, p1) {
-            const p1e = AllHtmlEntities.encode(p1);
-            return `<span data-mx-maths="${p1e}"></span>`;
+            md = md.replace(RegExp(inlinePattern, "gm"), function(m, p1, p2) {
+                const p2e = AllHtmlEntities.encode(p2);
+                return `${p1}<span data-mx-maths="${p2e}"></span>`;
            });
+        } else {
+            // detect math with latex delimiters, inline: \(...\), display \[...\]
+            const displayPattern = (SdkConfig.get()['latex_maths_delims'] || {})['display_pattern'] ||
+                "^\\\\\\[(.*?)\\\\\\]$";
+            const inlinePattern = (SdkConfig.get()['latex_maths_delims'] || {})['inline_pattern'] ||
+                "(^|\\s)\\\\\\((.*?)\\\\\\)";
+
+            md = md.replace(RegExp(displayPattern, "gms"), function(m, p1) {
+                const p1e = AllHtmlEntities.encode(p1);
+                return `<div data-mx-maths="${p1e}">\n\n</div>\n\n`;
+            });
+
+            md = md.replace(RegExp(inlinePattern, "gms"), function(m, p1, p2) {
+                const p2e = AllHtmlEntities.encode(p2);
+                return `${p1}<span data-mx-maths="${p2e}"></span>`;
+            });
+        }

        // make sure div tags always start on a new line, otherwise it will confuse
        // the markdown parser
@ -69,7 +102,21 @@ export function htmlSerializeIfNeeded(model: EditorModel, {forceHTML = false} =
    if (!parser.isPlainText() || forceHTML) {
        // feed Markdown output to HTML parser
        const phtml = cheerio.load(parser.toHTML(),
-            { _useHtmlParser2: true, decodeEntities: false })
+            { _useHtmlParser2: true, decodeEntities: false });
+
+        if (SettingsStore.getValue("feature_latex_maths")) {
+            // original Markdown without LaTeX replacements
+            const parserOrig = new Markdown(orig);
+            const phtmlOrig = cheerio.load(parserOrig.toHTML(),
+                { _useHtmlParser2: true, decodeEntities: false });
+
+            // since maths delimiters are handled before Markdown,
+            // code blocks could contain mangled content.
+            // replace code blocks with original content
+            phtml('code').contents('div, span').each(function(i) {
+                const origData = phtmlOrig('code').contents('div, span')[i].data;
+                phtml('code').contents('div, span')[i].data = origData;
+            });

            // add fallback output for latex math, which should not be interpreted as markdown
            phtml('div, span').each(function(i, e) {
@ -78,6 +125,7 @@ export function htmlSerializeIfNeeded(model: EditorModel, {forceHTML = false} =
                    phtml(e).html(`<code>${tex}</code>`)
                }
            });
+        }
        return phtml.html();
    }
    // ensure removal of escape backslashes in non-Markdown messages
@ -86,6 +134,12 @@ export function htmlSerializeIfNeeded(model: EditorModel, {forceHTML = false} =
    }
 }

+export function htmlSerializeIfNeeded(model: EditorModel, {forceHTML = false} = {}) {
+    let md = mdSerialize(model);
+
+    return markdownSerializeIfNeeded(md, {forceHTML: forceHTML});
+}
+
 export function textSerialize(model: EditorModel) {
    return model.parts.reduce((text, part) => {
        switch (part.type) {
--- a/src/i18n/strings/en_EN.json
+++ b/src/i18n/strings/en_EN.json
@ -416,6 +416,7 @@
    "Prepends ( ͡° ͜ʖ ͡°) to a plain-text message": "Prepends ( ͡° ͜ʖ ͡°) to a plain-text message",
    "Sends a message as plain text, without interpreting it as markdown": "Sends a message as plain text, without interpreting it as markdown",
    "Sends a message as html, without interpreting it as markdown": "Sends a message as html, without interpreting it as markdown",
+    "Sends a message in TeX mode, using $ and $$ delimiters for maths": "Sends a message in TeX mode, using $ and $$ delimiters for maths",
    "Searches DuckDuckGo for results": "Searches DuckDuckGo for results",
    "/ddg is not a command": "/ddg is not a command",
    "To use it, just wait for autocomplete results to load and tab through them.": "To use it, just wait for autocomplete results to load and tab through them.",