element-web/src/editor/serialize.ts

/*
Copyright 2019 New Vector Ltd
Copyright 2019, 2020 The Matrix.org Foundation C.I.C.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

import Markdown from '../Markdown';
import {makeGenericPermalink} from "../utils/permalinks/Permalinks";
import EditorModel from "./model";
import { AllHtmlEntities } from 'html-entities';
import SettingsStore from '../settings/SettingsStore';
import SdkConfig from '../SdkConfig';
import cheerio from 'cheerio';

export function mdSerialize(model: EditorModel) {
    return model.parts.reduce((html, part) => {
        switch (part.type) {
            case "newline":
                return html + "\n";
            case "plain":
            case "command":
            case "pill-candidate":
            case "at-room-pill":
                return html + part.text;
            case "room-pill":
            case "user-pill":
                return html +
                    `[${part.text.replace(/[[\\\]]/g, c => "\\" + c)}](${makeGenericPermalink(part.resourceId)})`;
        }
    }, "");
}

export function htmlSerializeIfNeeded(model: EditorModel, {forceHTML = false} = {}) {
    let md = mdSerialize(model);
    // copy of raw input to remove unwanted math later
    const orig = md;

    if (SettingsStore.getValue("feature_latex_maths")) {
        // detect math with tex delimiters, inline: $...$, display $$...$$
        // preferably use negative lookbehinds, not supported in all major browsers:
        // const displayPattern = "^(?<!\\\\)\\$\\$(?![ \\t])(([^$]|\\\\\\$)+?)\\$\\$$";
        // const inlinePattern = "(?:^|\\s)(?<!\\\\)\\$(?!\\s)(([^$]|\\\\\\$)+?)(?<!\\\\|\\s)\\$";

        // conditions for display math detection $$...$$:
        // - pattern starts at beginning of line or is not prefixed with backslash or dollar
        // - left delimiter ($$) is not escaped by backslash
        const displayPatternAlternative = (SdkConfig.get()['latex_maths_delims'] ||
            {})['display_pattern_alternative'] ||
            "(^|[^\\\\$])\\$\\$(([^$]|\\\\\\$)+?)\\$\\$";

        // conditions for inline math detection $...$:
        // - pattern starts at beginning of line, follows whitespace character or punctuation
        // - pattern is on a single line
        // - left and right delimiters ($) are not escaped by backslashes
        // - left delimiter is not followed by whitespace character
        // - right delimiter is not prefixed with whitespace character
        const inlinePatternAlternative = (SdkConfig.get()['latex_maths_delims'] ||
            {})['inline_pattern_alternative'] ||
            "(^|\\s|[.,!?:;])(?!\\\\)\\$(?!\\s)(([^$\\n]|\\\\\\$)*([^\\\\\\s\\$]|\\\\\\$)(?:\\\\\\$)?)\\$";

        md = md.replace(RegExp(displayPatternAlternative, "gm"), function(m, p1, p2) {
            const p2e = AllHtmlEntities.encode(p2);
            return `${p1}<div data-mx-maths="${p2e}">\n\n</div>\n\n`;
        });

        md = md.replace(RegExp(inlinePatternAlternative, "gm"), function(m, p1, p2) {
            const p2e = AllHtmlEntities.encode(p2);
            return `${p1}<span data-mx-maths="${p2e}"></span>`;
        });

        // detect math with latex delimiters, inline: \(...\), display \[...\]

        // conditions for display math detection \[...\]:
        // - pattern starts at beginning of line or is not prefixed with backslash
        // - pattern is not empty
        const displayPattern = (SdkConfig.get()['latex_maths_delims'] || {})['display_pattern'] ||
            "(^|[^\\\\])\\\\\\[(?!\\\\\\])(.*?)\\\\\\]";

        // conditions for inline math detection \(...\):
        // - pattern starts at beginning of line or is not prefixed with backslash
        // - pattern is not empty
        const inlinePattern = (SdkConfig.get()['latex_maths_delims'] || {})['inline_pattern'] ||
            "(^|[^\\\\])\\\\\\((?!\\\\\\))(.*?)\\\\\\)";

        md = md.replace(RegExp(displayPattern, "gms"), function(m, p1, p2) {
            const p2e = AllHtmlEntities.encode(p2);
            return `${p1}<div data-mx-maths="${p2e}">\n\n</div>\n\n`;
        });

        md = md.replace(RegExp(inlinePattern, "gms"), function(m, p1, p2) {
            const p2e = AllHtmlEntities.encode(p2);
            return `${p1}<span data-mx-maths="${p2e}"></span>`;
        });

        // make sure div tags always start on a new line, otherwise it will confuse
        // the markdown parser
        md = md.replace(/(.)<div/g, function(m, p1) { return `${p1}\n<div`; });
    }

    const parser = new Markdown(md);
    if (!parser.isPlainText() || forceHTML) {
        // feed Markdown output to HTML parser
        const phtml = cheerio.load(parser.toHTML(),
            { _useHtmlParser2: true, decodeEntities: false });

        if (SettingsStore.getValue("feature_latex_maths")) {
            // original Markdown without LaTeX replacements
            const parserOrig = new Markdown(orig);
            const phtmlOrig = cheerio.load(parserOrig.toHTML(),
                { _useHtmlParser2: true, decodeEntities: false });

            // since maths delimiters are handled before Markdown,
            // code blocks could contain mangled content.
            // replace code blocks with original content
            phtml('code').contents('div, span').each(function(i) {
                const origData = phtmlOrig('code').contents('div, span')[i].data;
                phtml('code').contents('div, span')[i].data = origData;
            });

            // add fallback output for latex math, which should not be interpreted as markdown
            phtml('div, span').each(function(i, e) {
                const tex = phtml(e).attr('data-mx-maths')
                if (tex) {
                    phtml(e).html(`<code>${tex}</code>`)
                }
            });
        }
        return phtml.html();
    }
    // ensure removal of escape backslashes in non-Markdown messages
    if (md.indexOf("\\") > -1) {
        return parser.toPlaintext();
    }
}

export function textSerialize(model: EditorModel) {
    return model.parts.reduce((text, part) => {
        switch (part.type) {
            case "newline":
                return text + "\n";
            case "plain":
            case "command":
            case "pill-candidate":
            case "at-room-pill":
                return text + part.text;
            case "room-pill":
            case "user-pill":
                return text + `${part.text}`;
        }
    }, "");
}

export function containsEmote(model: EditorModel) {
    return startsWith(model, "/me ", false);
}

export function startsWith(model: EditorModel, prefix: string, caseSensitive = true) {
    const firstPart = model.parts[0];
    // part type will be "plain" while editing,
    // and "command" while composing a message.
    let text = firstPart && firstPart.text;
    if (!caseSensitive) {
        prefix = prefix.toLowerCase();
        text = text.toLowerCase();
    }

    return firstPart && (firstPart.type === "plain" || firstPart.type === "command") && text.startsWith(prefix);
}

export function stripEmoteCommand(model: EditorModel) {
    // trim "/me "
    return stripPrefix(model, "/me ");
}

export function stripPrefix(model: EditorModel, prefix: string) {
    model = model.clone();
    model.removeText({index: 0, offset: 0}, prefix.length);
    return model;
}

export function unescapeMessage(model: EditorModel) {
    const {parts} = model;
    if (parts.length) {
        const firstPart = parts[0];
        // only unescape \/ to / at start of editor
        if (firstPart.type === "plain" && firstPart.text.startsWith("\\/")) {
            model = model.clone();
            model.removeText({index: 0, offset: 0}, 1);
        }
    }
    return model;
}
add matrix foundation copyright header 2019-05-22 16:16:32 +02:00			`/*`
			`Copyright 2019 New Vector Ltd`
Convert serialize and deserialize to TypeScript Signed-off-by: Michael Telatynski <7t3chguy@gmail.com> 2020-04-15 01:49:08 +02:00			`Copyright 2019, 2020 The Matrix.org Foundation C.I.C.`
add matrix foundation copyright header 2019-05-22 16:16:32 +02:00
			`Licensed under the Apache License, Version 2.0 (the "License");`
			`you may not use this file except in compliance with the License.`
			`You may obtain a copy of the License at`

			`http://www.apache.org/licenses/LICENSE-2.0`

			`Unless required by applicable law or agreed to in writing, software`
			`distributed under the License is distributed on an "AS IS" BASIS,`
			`WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`See the License for the specific language governing permissions and`
			`limitations under the License.`
			`*/`

re-apply markdown when saving a message 2019-05-21 17:59:54 +02:00			`import Markdown from '../Markdown';`
Rename RoomPermalinkCreator -> Permalinks due to scope The file handles more than just a RoomPermalinkCreator, so we should name it accordingly. 2019-10-01 04:39:58 +02:00			`import {makeGenericPermalink} from "../utils/permalinks/Permalinks";`
Convert serialize and deserialize to TypeScript Signed-off-by: Michael Telatynski <7t3chguy@gmail.com> 2020-04-15 01:49:08 +02:00			`import EditorModel from "./model";`
send tex math as data-mx-maths attribute 2020-09-20 13:59:22 +02:00			`import { AllHtmlEntities } from 'html-entities';`
latex math as labs setting 2020-10-10 17:32:49 +02:00			`import SettingsStore from '../settings/SettingsStore';`
math off by default, enable with latex_maths flag 2020-09-20 16:07:12 +02:00			`import SdkConfig from '../SdkConfig';`
add fallback output in code block AFTER markdown processing 2020-10-14 10:35:57 +02:00			`import cheerio from 'cheerio';`
re-apply markdown when saving a message 2019-05-21 17:59:54 +02:00
Convert serialize and deserialize to TypeScript Signed-off-by: Michael Telatynski <7t3chguy@gmail.com> 2020-04-15 01:49:08 +02:00			`export function mdSerialize(model: EditorModel) {`
also serialize to text and method to tell us if we need html for model 2019-05-14 11:37:16 +02:00			`return model.parts.reduce((html, part) => {`
			`switch (part.type) {`
			`case "newline":`
re-apply markdown when saving a message 2019-05-21 17:59:54 +02:00			`return html + "\n";`
also serialize to text and method to tell us if we need html for model 2019-05-14 11:37:16 +02:00			`case "plain":`
support auto complete for /commands 2019-08-21 15:27:50 +02:00			`case "command":`
also serialize to text and method to tell us if we need html for model 2019-05-14 11:37:16 +02:00			`case "pill-candidate":`
(de)serialize at-room-pills just like pill-candidate (no html needed) 2019-06-14 18:24:36 +02:00			`case "at-room-pill":`
also serialize to text and method to tell us if we need html for model 2019-05-14 11:37:16 +02:00			`return html + part.text;`
			`case "room-pill":`
			`case "user-pill":`
Use new eslint package- fix lint issues in ts and js 2020-06-23 17:41:36 +02:00			`return html +`
			`[${part.text.replace(/[[\\\]]/g, c => "\\" + c)}](${makeGenericPermalink(part.resourceId)})`;
also serialize to text and method to tell us if we need html for model 2019-05-14 11:37:16 +02:00			`}`
			`}, "");`
			`}`

Remove /tex command 2021-01-29 13:05:49 +01:00			`export function htmlSerializeIfNeeded(model: EditorModel, {forceHTML = false} = {}) {`
			`let md = mdSerialize(model);`
Use LaTeX delimiters by default, add /tex command Since parsing for $'s as maths delimiters is tricky, switch the default to \(...\) for inline and \[...\] for display maths as it is used in LaTeX. Add /tex command to explicitly parse in TeX mode, which uses $...$ for inline and $$...$$ for display maths. Signed-off-by: Sven Mäder <maeder@phys.ethz.ch> 2020-12-20 23:14:56 +01:00			`// copy of raw input to remove unwanted math later`
			`const orig = md;`
send tex math as data-mx-maths attribute 2020-09-20 13:59:22 +02:00
latex math as labs setting 2020-10-10 17:32:49 +02:00			`if (SettingsStore.getValue("feature_latex_maths")) {`
Remove /tex command 2021-01-29 13:05:49 +01:00			`// detect math with tex delimiters, inline: $...$, display $$...$$`
			`// preferably use negative lookbehinds, not supported in all major browsers:`
			`// const displayPattern = "^(?<!\\\\)\\$\\$(?![ \\t])(([^$]\|\\\\\\$)+?)\\$\\$$";`
			`// const inlinePattern = "(?:^\|\\s)(?<!\\\\)\\$(?!\\s)(([^$]\|\\\\\\$)+?)(?<!\\\\\|\\s)\\$";`

			`// conditions for display math detection $$...$$:`
			`// - pattern starts at beginning of line or is not prefixed with backslash or dollar`
			`// - left delimiter ($$) is not escaped by backslash`
Add config keys for alternative patterns 2021-01-29 15:49:20 +01:00			`const displayPatternAlternative = (SdkConfig.get()['latex_maths_delims'] \|\|`
			`{})['display_pattern_alternative'] \|\|`
			`"(^\|[^\\\\$])\\$\\$(([^$]\|\\\\\\$)+?)\\$\\$";`
Remove /tex command 2021-01-29 13:05:49 +01:00
			`// conditions for inline math detection $...$:`
			`// - pattern starts at beginning of line, follows whitespace character or punctuation`
			`// - pattern is on a single line`
			`// - left and right delimiters ($) are not escaped by backslashes`
			`// - left delimiter is not followed by whitespace character`
			`// - right delimiter is not prefixed with whitespace character`
Add config keys for alternative patterns 2021-01-29 15:49:20 +01:00			`const inlinePatternAlternative = (SdkConfig.get()['latex_maths_delims'] \|\|`
			`{})['inline_pattern_alternative'] \|\|`
			`"(^\|\\s\|[.,!?:;])(?!\\\\)\\$(?!\\s)(([^$\\n]\|\\\\\\$)*([^\\\\\\s\\$]\|\\\\\\$)(?:\\\\\\$)?)\\$";`
Remove /tex command 2021-01-29 13:05:49 +01:00
Add config keys for alternative patterns 2021-01-29 15:49:20 +01:00			`md = md.replace(RegExp(displayPatternAlternative, "gm"), function(m, p1, p2) {`
Remove /tex command 2021-01-29 13:05:49 +01:00			`const p2e = AllHtmlEntities.encode(p2);`
			return `${p1}<div data-mx-maths="${p2e}">\n\n</div>\n\n`;
			`});`

Add config keys for alternative patterns 2021-01-29 15:49:20 +01:00			`md = md.replace(RegExp(inlinePatternAlternative, "gm"), function(m, p1, p2) {`
Remove /tex command 2021-01-29 13:05:49 +01:00			`const p2e = AllHtmlEntities.encode(p2);`
			return `${p1}<span data-mx-maths="${p2e}"></span>`;
			`});`

			`// detect math with latex delimiters, inline: \(...\), display \[...\]`

			`// conditions for display math detection \[...\]:`
			`// - pattern starts at beginning of line or is not prefixed with backslash`
			`// - pattern is not empty`
			`const displayPattern = (SdkConfig.get()['latex_maths_delims'] \|\| {})['display_pattern'] \|\|`
			`"(^\|[^\\\\])\\\\\\[(?!\\\\\\])(.*?)\\\\\\]";`

			`// conditions for inline math detection \(...\):`
			`// - pattern starts at beginning of line or is not prefixed with backslash`
			`// - pattern is not empty`
			`const inlinePattern = (SdkConfig.get()['latex_maths_delims'] \|\| {})['inline_pattern'] \|\|`
			`"(^\|[^\\\\])\\\\\\((?!\\\\\\))(.*?)\\\\\\)";`

			`md = md.replace(RegExp(displayPattern, "gms"), function(m, p1, p2) {`
			`const p2e = AllHtmlEntities.encode(p2);`
			return `${p1}<div data-mx-maths="${p2e}">\n\n</div>\n\n`;
			`});`

			`md = md.replace(RegExp(inlinePattern, "gms"), function(m, p1, p2) {`
			`const p2e = AllHtmlEntities.encode(p2);`
			return `${p1}<span data-mx-maths="${p2e}"></span>`;
			`});`
generate valid block html for commonmark spec 2020-10-25 19:32:24 +01:00
			`// make sure div tags always start on a new line, otherwise it will confuse`
			`// the markdown parser`
			md = md.replace(/(.)<div/g, function(m, p1) { return `${p1}\n<div`; });
send tex math as data-mx-maths attribute 2020-09-20 13:59:22 +02:00			`}`

re-apply markdown when saving a message 2019-05-21 17:59:54 +02:00			`const parser = new Markdown(md);`
turn flag into options object 2019-07-08 16:55:56 +02:00			`if (!parser.isPlainText() \|\| forceHTML) {`
add fallback output in code block AFTER markdown processing 2020-10-14 10:35:57 +02:00			`// feed Markdown output to HTML parser`
			`const phtml = cheerio.load(parser.toHTML(),`
Use LaTeX delimiters by default, add /tex command Since parsing for $'s as maths delimiters is tricky, switch the default to \(...\) for inline and \[...\] for display maths as it is used in LaTeX. Add /tex command to explicitly parse in TeX mode, which uses $...$ for inline and $$...$$ for display maths. Signed-off-by: Sven Mäder <maeder@phys.ethz.ch> 2020-12-20 23:14:56 +01:00			`{ _useHtmlParser2: true, decodeEntities: false });`

			`if (SettingsStore.getValue("feature_latex_maths")) {`
			`// original Markdown without LaTeX replacements`
			`const parserOrig = new Markdown(orig);`
			`const phtmlOrig = cheerio.load(parserOrig.toHTML(),`
			`{ _useHtmlParser2: true, decodeEntities: false });`

			`// since maths delimiters are handled before Markdown,`
			`// code blocks could contain mangled content.`
			`// replace code blocks with original content`
			`phtml('code').contents('div, span').each(function(i) {`
			`const origData = phtmlOrig('code').contents('div, span')[i].data;`
			`phtml('code').contents('div, span')[i].data = origData;`
			`});`

			`// add fallback output for latex math, which should not be interpreted as markdown`
			`phtml('div, span').each(function(i, e) {`
			`const tex = phtml(e).attr('data-mx-maths')`
			`if (tex) {`
			phtml(e).html(`<code>${tex}</code>`)
			`}`
			`});`
			`}`
add fallback output in code block AFTER markdown processing 2020-10-14 10:35:57 +02:00			`return phtml.html();`
re-apply markdown when saving a message 2019-05-21 17:59:54 +02:00			`}`
Remove escape backslashes in non-Markdown messages 2020-06-03 23:36:48 +02:00			`// ensure removal of escape backslashes in non-Markdown messages`
			`if (md.indexOf("\\") > -1) {`
			`return parser.toPlaintext();`
			`}`
re-apply markdown when saving a message 2019-05-21 17:59:54 +02:00			`}`

Convert serialize and deserialize to TypeScript Signed-off-by: Michael Telatynski <7t3chguy@gmail.com> 2020-04-15 01:49:08 +02:00			`export function textSerialize(model: EditorModel) {`
also serialize to text and method to tell us if we need html for model 2019-05-14 11:37:16 +02:00			`return model.parts.reduce((text, part) => {`
			`switch (part.type) {`
			`case "newline":`
			`return text + "\n";`
			`case "plain":`
support auto complete for /commands 2019-08-21 15:27:50 +02:00			`case "command":`
also serialize to text and method to tell us if we need html for model 2019-05-14 11:37:16 +02:00			`case "pill-candidate":`
(de)serialize at-room-pills just like pill-candidate (no html needed) 2019-06-14 18:24:36 +02:00			`case "at-room-pill":`
also serialize to text and method to tell us if we need html for model 2019-05-14 11:37:16 +02:00			`return text + part.text;`
			`case "room-pill":`
			`case "user-pill":`
put display name in user pill text fallback instead of mxid 2019-08-30 11:51:29 +02:00			return text + `${part.text}`;
also serialize to text and method to tell us if we need html for model 2019-05-14 11:37:16 +02:00			`}`
			`}, "");`
			`}`
add support for emotes and running /commands this does not yet include autocomplete for commands 2019-08-21 11:26:21 +02:00
Convert serialize and deserialize to TypeScript Signed-off-by: Michael Telatynski <7t3chguy@gmail.com> 2020-04-15 01:49:08 +02:00			`export function containsEmote(model: EditorModel) {`
avoid negatives 2020-06-16 15:06:42 +02:00			`return startsWith(model, "/me ", false);`
Fix escaping commands using double-slash //, e.g //plain sends `/plain` 2020-01-21 16:55:21 +01:00			`}`

avoid negatives 2020-06-16 15:06:42 +02:00			`export function startsWith(model: EditorModel, prefix: string, caseSensitive = true) {`
add support for emotes and running /commands this does not yet include autocomplete for commands 2019-08-21 11:26:21 +02:00			`const firstPart = model.parts[0];`
support auto complete for /commands 2019-08-21 15:27:50 +02:00			`// part type will be "plain" while editing,`
			`// and "command" while composing a message.`
Fix case-sensitivity of /me to match rest of slash commands also better error handling for attempted runs of unimplemented commands 2020-06-16 01:41:21 +02:00			`let text = firstPart && firstPart.text;`
avoid negatives 2020-06-16 15:06:42 +02:00			`if (!caseSensitive) {`
Fix case-sensitivity of /me to match rest of slash commands also better error handling for attempted runs of unimplemented commands 2020-06-16 01:41:21 +02:00			`prefix = prefix.toLowerCase();`
			`text = text.toLowerCase();`
			`}`

			`return firstPart && (firstPart.type === "plain" \|\| firstPart.type === "command") && text.startsWith(prefix);`
add support for emotes and running /commands this does not yet include autocomplete for commands 2019-08-21 11:26:21 +02:00			`}`

Convert serialize and deserialize to TypeScript Signed-off-by: Michael Telatynski <7t3chguy@gmail.com> 2020-04-15 01:49:08 +02:00			`export function stripEmoteCommand(model: EditorModel) {`
add support for emotes and running /commands this does not yet include autocomplete for commands 2019-08-21 11:26:21 +02:00			`// trim "/me "`
Fix escaping commands using double-slash //, e.g //plain sends `/plain` 2020-01-21 16:55:21 +01:00			`return stripPrefix(model, "/me ");`
			`}`

Convert serialize and deserialize to TypeScript Signed-off-by: Michael Telatynski <7t3chguy@gmail.com> 2020-04-15 01:49:08 +02:00			`export function stripPrefix(model: EditorModel, prefix: string) {`
add support for emotes and running /commands this does not yet include autocomplete for commands 2019-08-21 11:26:21 +02:00			`model = model.clone();`
Fix escaping commands using double-slash //, e.g //plain sends `/plain` 2020-01-21 16:55:21 +01:00			`model.removeText({index: 0, offset: 0}, prefix.length);`
add support for emotes and running /commands this does not yet include autocomplete for commands 2019-08-21 11:26:21 +02:00			`return model;`
			`}`
allow escaping the first slash to not write a command 2019-09-02 17:53:14 +02:00
Convert serialize and deserialize to TypeScript Signed-off-by: Michael Telatynski <7t3chguy@gmail.com> 2020-04-15 01:49:08 +02:00			`export function unescapeMessage(model: EditorModel) {`
allow escaping the first slash to not write a command 2019-09-02 17:53:14 +02:00			`const {parts} = model;`
			`if (parts.length) {`
			`const firstPart = parts[0];`
			`// only unescape \/ to / at start of editor`
			`if (firstPart.type === "plain" && firstPart.text.startsWith("\\/")) {`
			`model = model.clone();`
			`model.removeText({index: 0, offset: 0}, 1);`
			`}`
			`}`
			`return model;`
			`}`