2015-11-27 16:02:32 +01:00
|
|
|
/*
|
2016-01-07 05:06:39 +01:00
|
|
|
Copyright 2015, 2016 OpenMarket Ltd
|
2018-02-09 13:20:05 +01:00
|
|
|
Copyright 2017, 2018 New Vector Ltd
|
2019-06-29 08:28:09 +02:00
|
|
|
Copyright 2019 Michael Telatynski <7t3chguy@gmail.com>
|
2019-10-01 04:17:54 +02:00
|
|
|
Copyright 2019 The Matrix.org Foundation C.I.C.
|
2015-11-27 16:02:32 +01:00
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
|
2021-06-22 18:23:13 +02:00
|
|
|
import React, { ReactNode } from 'react';
|
2019-01-31 22:26:07 +01:00
|
|
|
import sanitizeHtml from 'sanitize-html';
|
2021-06-22 18:23:13 +02:00
|
|
|
import cheerio from 'cheerio';
|
2016-07-05 00:34:57 +02:00
|
|
|
import classNames from 'classnames';
|
2020-07-08 09:40:58 +02:00
|
|
|
import EMOJIBASE_REGEX from 'emojibase-regex';
|
2020-09-19 16:30:00 +02:00
|
|
|
import katex from 'katex';
|
2020-09-20 13:59:22 +02:00
|
|
|
import { AllHtmlEntities } from 'html-entities';
|
2021-06-22 18:23:13 +02:00
|
|
|
import { IContent } from 'matrix-js-sdk/src/models/event';
|
2016-07-05 00:34:57 +02:00
|
|
|
|
2021-12-09 10:10:23 +01:00
|
|
|
import { _linkifyElement, _linkifyString } from './linkify-matrix';
|
2021-06-22 18:23:13 +02:00
|
|
|
import { IExtendedSanitizeOptions } from './@types/sanitize-html';
|
|
|
|
import SettingsStore from './settings/SettingsStore';
|
|
|
|
import { tryTransformPermalinkToLocalHref } from "./utils/permalinks/Permalinks";
|
2021-07-16 22:36:03 +02:00
|
|
|
import { getEmojiFromUnicode } from "./emoji";
|
2021-10-15 18:42:44 +02:00
|
|
|
import ReplyChain from "./components/views/elements/ReplyChain";
|
2021-06-22 18:23:13 +02:00
|
|
|
import { mediaFromMxc } from "./customisations/Media";
|
2021-12-03 15:00:56 +01:00
|
|
|
import { ELEMENT_URL_PATTERN, options as linkifyMatrixOptions } from './linkify-matrix';
|
2016-08-09 18:10:05 +02:00
|
|
|
|
2017-09-15 12:43:55 +02:00
|
|
|
// Anything outside the basic multilingual plane will be a surrogate pair
|
|
|
|
const SURROGATE_PAIR_PATTERN = /([\ud800-\udbff])([\udc00-\udfff])/;
|
2019-05-19 16:23:43 +02:00
|
|
|
// And there a bunch more symbol characters that emojibase has within the
|
2017-09-15 12:43:55 +02:00
|
|
|
// BMP, so this includes the ranges from 'letterlike symbols' to
|
|
|
|
// 'miscellaneous symbols and arrows' which should catch all of them
|
|
|
|
// (with plenty of false positives, but that's OK)
|
|
|
|
const SYMBOL_PATTERN = /([\u2100-\u2bff])/;
|
|
|
|
|
2019-03-06 15:53:24 +01:00
|
|
|
// Regex pattern for Zero-Width joiner unicode characters
|
2019-03-05 13:33:37 +01:00
|
|
|
const ZWJ_REGEX = new RegExp("\u200D|\u2003", "g");
|
|
|
|
|
2019-03-06 15:53:24 +01:00
|
|
|
// Regex pattern for whitespace characters
|
|
|
|
const WHITESPACE_REGEX = new RegExp("\\s", "g");
|
|
|
|
|
2019-05-19 18:53:36 +02:00
|
|
|
const BIGEMOJI_REGEX = new RegExp(`^(${EMOJIBASE_REGEX.source})+$`, 'i');
|
2019-05-19 18:06:21 +02:00
|
|
|
|
2017-03-03 16:46:13 +01:00
|
|
|
const COLOR_REGEX = /^#[0-9a-fA-F]{6}$/;
|
2015-11-27 16:02:32 +01:00
|
|
|
|
2021-08-04 10:33:37 +02:00
|
|
|
export const PERMITTED_URL_SCHEMES = [
|
|
|
|
"bitcoin",
|
|
|
|
"ftp",
|
|
|
|
"geo",
|
|
|
|
"http",
|
|
|
|
"https",
|
|
|
|
"im",
|
|
|
|
"irc",
|
|
|
|
"ircs",
|
|
|
|
"magnet",
|
|
|
|
"mailto",
|
|
|
|
"matrix",
|
|
|
|
"mms",
|
|
|
|
"news",
|
|
|
|
"nntp",
|
|
|
|
"openpgp4fpr",
|
|
|
|
"sip",
|
|
|
|
"sftp",
|
|
|
|
"sms",
|
|
|
|
"smsto",
|
|
|
|
"ssh",
|
|
|
|
"tel",
|
|
|
|
"urn",
|
|
|
|
"webcal",
|
|
|
|
"wtai",
|
|
|
|
"xmpp",
|
|
|
|
];
|
2018-02-09 13:20:05 +01:00
|
|
|
|
2021-07-08 15:22:38 +02:00
|
|
|
const MEDIA_API_MXC_REGEX = /\/_matrix\/media\/r0\/(?:download|thumbnail)\/(.+?)\/(.+?)(?:[?/]|$)/;
|
|
|
|
|
2017-09-09 00:05:27 +02:00
|
|
|
/*
|
|
|
|
* Return true if the given string contains emoji
|
2019-05-19 16:23:43 +02:00
|
|
|
* Uses a much, much simpler regex than emojibase's so will give false
|
2017-09-09 00:05:27 +02:00
|
|
|
* positives, but useful for fast-path testing strings to see if they
|
|
|
|
* need emojification.
|
|
|
|
* unicodeToImage uses this function.
|
|
|
|
*/
|
2021-06-22 18:23:13 +02:00
|
|
|
function mightContainEmoji(str: string): boolean {
|
2017-09-15 12:43:55 +02:00
|
|
|
return SURROGATE_PAIR_PATTERN.test(str) || SYMBOL_PATTERN.test(str);
|
2017-09-09 00:05:27 +02:00
|
|
|
}
|
|
|
|
|
2019-05-17 12:52:03 +02:00
|
|
|
/**
|
|
|
|
* Returns the shortcode for an emoji character.
|
|
|
|
*
|
|
|
|
* @param {String} char The emoji character
|
|
|
|
* @return {String} The shortcode (such as :thumbup:)
|
|
|
|
*/
|
2021-06-22 18:23:13 +02:00
|
|
|
export function unicodeToShortcode(char: string): string {
|
2021-07-21 04:41:49 +02:00
|
|
|
const shortcodes = getEmojiFromUnicode(char)?.shortcodes;
|
2021-07-21 09:17:55 +02:00
|
|
|
return shortcodes?.length ? `:${shortcodes[0]}:` : '';
|
2017-03-14 12:50:13 +01:00
|
|
|
}
|
|
|
|
|
2019-05-20 11:10:30 +02:00
|
|
|
export function processHtmlForSending(html: string): string {
|
2018-07-18 11:48:54 +02:00
|
|
|
const contentDiv = document.createElement('div');
|
|
|
|
contentDiv.innerHTML = html;
|
|
|
|
|
|
|
|
if (contentDiv.children.length === 0) {
|
|
|
|
return contentDiv.innerHTML;
|
|
|
|
}
|
|
|
|
|
|
|
|
let contentHTML = "";
|
2020-07-08 09:40:58 +02:00
|
|
|
for (let i = 0; i < contentDiv.children.length; i++) {
|
2018-07-18 11:48:54 +02:00
|
|
|
const element = contentDiv.children[i];
|
|
|
|
if (element.tagName.toLowerCase() === 'p') {
|
|
|
|
contentHTML += element.innerHTML;
|
|
|
|
// Don't add a <br /> for the last <p>
|
|
|
|
if (i !== contentDiv.children.length - 1) {
|
|
|
|
contentHTML += '<br />';
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
const temp = document.createElement('div');
|
|
|
|
temp.appendChild(element.cloneNode(true));
|
|
|
|
contentHTML += temp.innerHTML;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return contentHTML;
|
|
|
|
}
|
|
|
|
|
2017-06-23 18:02:54 +02:00
|
|
|
/*
|
|
|
|
* Given an untrusted HTML string, return a React node with an sanitized version
|
|
|
|
* of that HTML.
|
|
|
|
*/
|
2021-06-22 18:23:13 +02:00
|
|
|
export function sanitizedHtmlNode(insaneHtml: string): ReactNode {
|
2017-09-13 13:04:46 +02:00
|
|
|
const saneHtml = sanitizeHtml(insaneHtml, sanitizeHtmlParams);
|
2017-06-23 18:02:54 +02:00
|
|
|
|
|
|
|
return <div dangerouslySetInnerHTML={{ __html: saneHtml }} dir="auto" />;
|
|
|
|
}
|
|
|
|
|
2021-06-22 18:23:13 +02:00
|
|
|
export function getHtmlText(insaneHtml: string): string {
|
2021-04-22 17:17:53 +02:00
|
|
|
return sanitizeHtml(insaneHtml, {
|
|
|
|
allowedTags: [],
|
|
|
|
allowedAttributes: {},
|
|
|
|
selfClosing: [],
|
|
|
|
allowedSchemes: [],
|
|
|
|
disallowedTagsMode: 'discard',
|
2021-06-29 14:11:58 +02:00
|
|
|
});
|
2020-07-08 09:50:25 +02:00
|
|
|
}
|
|
|
|
|
2018-02-09 13:20:05 +01:00
|
|
|
/**
|
|
|
|
* Tests if a URL from an untrusted source may be safely put into the DOM
|
|
|
|
* The biggest threat here is javascript: URIs.
|
|
|
|
* Note that the HTML sanitiser library has its own internal logic for
|
|
|
|
* doing this, to which we pass the same list of schemes. This is used in
|
|
|
|
* other places we need to sanitise URLs.
|
|
|
|
* @return true if permitted, otherwise false
|
|
|
|
*/
|
2021-06-22 18:23:13 +02:00
|
|
|
export function isUrlPermitted(inputUrl: string): boolean {
|
2018-02-09 13:20:05 +01:00
|
|
|
try {
|
|
|
|
// URL parser protocol includes the trailing colon
|
2021-07-16 14:18:12 +02:00
|
|
|
return PERMITTED_URL_SCHEMES.includes(new URL(inputUrl).protocol.slice(0, -1));
|
2018-02-09 13:20:05 +01:00
|
|
|
} catch (e) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-09-22 19:06:10 +02:00
|
|
|
const transformTags: IExtendedSanitizeOptions["transformTags"] = { // custom to matrix
|
2018-07-18 11:10:42 +02:00
|
|
|
// add blank targets to all hyperlinks except vector URLs
|
2020-07-08 09:40:58 +02:00
|
|
|
'a': function(tagName: string, attribs: sanitizeHtml.Attributes) {
|
2018-07-18 11:10:42 +02:00
|
|
|
if (attribs.href) {
|
|
|
|
attribs.target = '_blank'; // by default
|
|
|
|
|
2019-10-01 04:17:54 +02:00
|
|
|
const transformed = tryTransformPermalinkToLocalHref(attribs.href);
|
2021-12-03 15:00:56 +01:00
|
|
|
if (transformed !== attribs.href || attribs.href.match(ELEMENT_URL_PATTERN)) {
|
2019-10-01 04:17:54 +02:00
|
|
|
attribs.href = transformed;
|
2018-07-18 11:10:42 +02:00
|
|
|
delete attribs.target;
|
|
|
|
}
|
|
|
|
}
|
2020-02-23 23:14:29 +01:00
|
|
|
attribs.rel = 'noreferrer noopener'; // https://mathiasbynens.github.io/rel-noopener/
|
2018-07-18 11:10:42 +02:00
|
|
|
return { tagName, attribs };
|
|
|
|
},
|
2020-07-08 09:40:58 +02:00
|
|
|
'img': function(tagName: string, attribs: sanitizeHtml.Attributes) {
|
2021-07-08 15:22:38 +02:00
|
|
|
let src = attribs.src;
|
2018-07-18 11:10:42 +02:00
|
|
|
// Strip out imgs that aren't `mxc` here instead of using allowedSchemesByTag
|
|
|
|
// because transformTags is used _before_ we filter by allowedSchemesByTag and
|
|
|
|
// we don't want to allow images with `https?` `src`s.
|
2020-10-28 14:12:12 +01:00
|
|
|
// We also drop inline images (as if they were not present at all) when the "show
|
2020-10-28 14:11:35 +01:00
|
|
|
// images" preference is disabled. Future work might expose some UI to reveal them
|
|
|
|
// like standalone image events have.
|
2021-07-08 15:22:38 +02:00
|
|
|
if (!src || !SettingsStore.getValue("showImages")) {
|
|
|
|
return { tagName, attribs: {} };
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!src.startsWith("mxc://")) {
|
|
|
|
const match = MEDIA_API_MXC_REGEX.exec(src);
|
|
|
|
if (match) {
|
|
|
|
src = `mxc://${match[1]}/${match[2]}`;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!src.startsWith("mxc://")) {
|
2021-06-29 14:11:58 +02:00
|
|
|
return { tagName, attribs: {} };
|
2018-07-18 11:10:42 +02:00
|
|
|
}
|
2021-07-08 15:22:38 +02:00
|
|
|
|
2022-01-10 16:32:06 +01:00
|
|
|
const width = Math.min(Number(attribs.width) || 800, 800);
|
|
|
|
const height = Math.min(Number(attribs.height) || 600, 600);
|
|
|
|
// specify width/height as max values instead of absolute ones to allow object-fit to do its thing
|
|
|
|
// we only allow our own styles for this tag so overwrite the attribute
|
|
|
|
attribs.style = `max-width: ${width}px; max-height: ${height}px;`;
|
|
|
|
|
2021-07-08 15:22:38 +02:00
|
|
|
attribs.src = mediaFromMxc(src).getThumbnailOfSourceHttp(width, height);
|
2018-07-18 11:10:42 +02:00
|
|
|
return { tagName, attribs };
|
|
|
|
},
|
2020-07-08 09:40:58 +02:00
|
|
|
'code': function(tagName: string, attribs: sanitizeHtml.Attributes) {
|
2018-07-18 11:10:42 +02:00
|
|
|
if (typeof attribs.class !== 'undefined') {
|
|
|
|
// Filter out all classes other than ones starting with language- for syntax highlighting.
|
2019-03-05 13:33:37 +01:00
|
|
|
const classes = attribs.class.split(/\s/).filter(function(cl) {
|
2020-07-21 18:47:40 +02:00
|
|
|
return cl.startsWith('language-') && !cl.startsWith('language-_');
|
2018-07-18 11:10:42 +02:00
|
|
|
});
|
|
|
|
attribs.class = classes.join(' ');
|
|
|
|
}
|
|
|
|
return { tagName, attribs };
|
|
|
|
},
|
2020-07-08 09:40:58 +02:00
|
|
|
'*': function(tagName: string, attribs: sanitizeHtml.Attributes) {
|
2022-01-10 16:32:06 +01:00
|
|
|
// Delete any style previously assigned, style is an allowedTag for font, span & img,
|
|
|
|
// because attributes are stripped after transforming.
|
|
|
|
// For img this is trusted as it is generated wholly within the img transformation method.
|
|
|
|
if (tagName !== "img") {
|
|
|
|
delete attribs.style;
|
|
|
|
}
|
2018-07-18 11:10:42 +02:00
|
|
|
|
|
|
|
// Sanitise and transform data-mx-color and data-mx-bg-color to their CSS
|
|
|
|
// equivalents
|
|
|
|
const customCSSMapper = {
|
|
|
|
'data-mx-color': 'color',
|
|
|
|
'data-mx-bg-color': 'background-color',
|
|
|
|
// $customAttributeKey: $cssAttributeKey
|
|
|
|
};
|
|
|
|
|
|
|
|
let style = "";
|
|
|
|
Object.keys(customCSSMapper).forEach((customAttributeKey) => {
|
|
|
|
const cssAttributeKey = customCSSMapper[customAttributeKey];
|
|
|
|
const customAttributeValue = attribs[customAttributeKey];
|
|
|
|
if (customAttributeValue &&
|
|
|
|
typeof customAttributeValue === 'string' &&
|
|
|
|
COLOR_REGEX.test(customAttributeValue)
|
|
|
|
) {
|
|
|
|
style += cssAttributeKey + ":" + customAttributeValue + ";";
|
|
|
|
delete attribs[customAttributeKey];
|
|
|
|
}
|
|
|
|
});
|
|
|
|
|
|
|
|
if (style) {
|
2022-01-10 16:32:06 +01:00
|
|
|
attribs.style = style + (attribs.style || "");
|
2018-07-18 11:10:42 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return { tagName, attribs };
|
|
|
|
},
|
|
|
|
};
|
|
|
|
|
2020-09-22 19:06:10 +02:00
|
|
|
const sanitizeHtmlParams: IExtendedSanitizeOptions = {
|
2015-11-27 16:02:32 +01:00
|
|
|
allowedTags: [
|
2016-02-09 16:07:39 +01:00
|
|
|
'font', // custom to matrix for IRC-style font coloring
|
2015-11-28 13:44:10 +01:00
|
|
|
'del', // for markdown
|
2017-09-13 13:04:46 +02:00
|
|
|
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'blockquote', 'p', 'a', 'ul', 'ol', 'sup', 'sub',
|
2016-04-02 18:45:29 +02:00
|
|
|
'nl', 'li', 'b', 'i', 'u', 'strong', 'em', 'strike', 'code', 'hr', 'br', 'div',
|
2017-04-13 15:08:19 +02:00
|
|
|
'table', 'thead', 'caption', 'tbody', 'tr', 'th', 'td', 'pre', 'span', 'img',
|
2021-03-14 18:34:33 +01:00
|
|
|
'details', 'summary',
|
2015-11-27 16:02:32 +01:00
|
|
|
],
|
|
|
|
allowedAttributes: {
|
2022-01-10 16:32:06 +01:00
|
|
|
// attribute sanitization happens after transformations, so we have to accept `style` for font, span & img
|
|
|
|
// but strip during the transformation.
|
2015-11-27 16:02:32 +01:00
|
|
|
// custom ones first:
|
2017-03-02 12:36:56 +01:00
|
|
|
font: ['color', 'data-mx-bg-color', 'data-mx-color', 'style'], // custom to matrix
|
2020-09-20 13:59:22 +02:00
|
|
|
span: ['data-mx-maths', 'data-mx-bg-color', 'data-mx-color', 'data-mx-spoiler', 'style'], // custom to matrix
|
|
|
|
div: ['data-mx-maths'],
|
2017-01-20 15:22:27 +01:00
|
|
|
a: ['href', 'name', 'target', 'rel'], // remote target: custom to matrix
|
2022-01-10 16:32:06 +01:00
|
|
|
// img tags also accept width/height, we just map those to max-width & max-height during transformation
|
|
|
|
img: ['src', 'alt', 'title', 'style'],
|
2017-04-02 12:19:50 +02:00
|
|
|
ol: ['start'],
|
2017-06-12 02:03:38 +02:00
|
|
|
code: ['class'], // We don't actually allow all classes, we filter them in transformTags
|
2015-11-27 16:02:32 +01:00
|
|
|
},
|
|
|
|
// Lots of these won't come up by default because we don't allow them
|
2017-01-20 15:22:27 +01:00
|
|
|
selfClosing: ['img', 'br', 'hr', 'area', 'base', 'basefont', 'input', 'link', 'meta'],
|
2015-11-27 16:02:32 +01:00
|
|
|
// URL schemes we permit
|
2018-02-09 13:20:05 +01:00
|
|
|
allowedSchemes: PERMITTED_URL_SCHEMES,
|
2017-02-19 02:04:42 +01:00
|
|
|
allowProtocolRelative: false,
|
2018-07-18 11:10:42 +02:00
|
|
|
transformTags,
|
2020-09-22 19:06:10 +02:00
|
|
|
// 50 levels deep "should be enough for anyone"
|
|
|
|
nestingLimit: 50,
|
2018-07-18 11:10:42 +02:00
|
|
|
};
|
2016-08-15 22:37:26 +02:00
|
|
|
|
2018-07-18 11:10:42 +02:00
|
|
|
// this is the same as the above except with less rewriting
|
2020-09-22 19:06:10 +02:00
|
|
|
const composerSanitizeHtmlParams: IExtendedSanitizeOptions = {
|
2020-07-08 09:40:58 +02:00
|
|
|
...sanitizeHtmlParams,
|
|
|
|
transformTags: {
|
|
|
|
'code': transformTags['code'],
|
|
|
|
'*': transformTags['*'],
|
|
|
|
},
|
2015-11-27 16:02:32 +01:00
|
|
|
};
|
|
|
|
|
2020-07-08 09:40:58 +02:00
|
|
|
abstract class BaseHighlighter<T extends React.ReactNode> {
|
|
|
|
constructor(public highlightClass: string, public highlightLink: string) {
|
2015-12-24 00:50:35 +01:00
|
|
|
}
|
|
|
|
|
2016-02-17 20:50:04 +01:00
|
|
|
/**
|
|
|
|
* apply the highlights to a section of text
|
|
|
|
*
|
|
|
|
* @param {string} safeSnippet The snippet of text to apply the highlights
|
|
|
|
* to.
|
|
|
|
* @param {string[]} safeHighlights A list of substrings to highlight,
|
|
|
|
* sorted by descending length.
|
|
|
|
*
|
|
|
|
* returns a list of results (strings for HtmlHighligher, react nodes for
|
|
|
|
* TextHighlighter).
|
|
|
|
*/
|
2020-07-08 09:40:58 +02:00
|
|
|
public applyHighlights(safeSnippet: string, safeHighlights: string[]): T[] {
|
2017-10-11 18:56:17 +02:00
|
|
|
let lastOffset = 0;
|
|
|
|
let offset;
|
2020-07-08 09:40:58 +02:00
|
|
|
let nodes: T[] = [];
|
2015-11-29 04:22:01 +01:00
|
|
|
|
2017-10-11 18:56:17 +02:00
|
|
|
const safeHighlight = safeHighlights[0];
|
2015-12-28 04:14:50 +01:00
|
|
|
while ((offset = safeSnippet.toLowerCase().indexOf(safeHighlight.toLowerCase(), lastOffset)) >= 0) {
|
2015-11-29 04:22:01 +01:00
|
|
|
// handle preamble
|
|
|
|
if (offset > lastOffset) {
|
2020-07-08 09:40:58 +02:00
|
|
|
const subSnippet = safeSnippet.substring(lastOffset, offset);
|
|
|
|
nodes = nodes.concat(this.applySubHighlights(subSnippet, safeHighlights));
|
2015-11-29 04:22:01 +01:00
|
|
|
}
|
|
|
|
|
2016-02-17 20:50:04 +01:00
|
|
|
// do highlight. use the original string rather than safeHighlight
|
|
|
|
// to preserve the original casing.
|
2017-10-11 18:56:17 +02:00
|
|
|
const endOffset = offset + safeHighlight.length;
|
2020-07-08 09:40:58 +02:00
|
|
|
nodes.push(this.processSnippet(safeSnippet.substring(offset, endOffset), true));
|
2015-11-29 04:22:01 +01:00
|
|
|
|
2016-02-17 20:50:04 +01:00
|
|
|
lastOffset = endOffset;
|
2015-11-29 04:22:01 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// handle postamble
|
2016-09-16 17:02:08 +02:00
|
|
|
if (lastOffset !== safeSnippet.length) {
|
2020-07-08 09:40:58 +02:00
|
|
|
const subSnippet = safeSnippet.substring(lastOffset, undefined);
|
|
|
|
nodes = nodes.concat(this.applySubHighlights(subSnippet, safeHighlights));
|
2015-11-29 14:00:58 +01:00
|
|
|
}
|
|
|
|
return nodes;
|
2015-12-24 00:50:35 +01:00
|
|
|
}
|
2015-11-29 14:00:58 +01:00
|
|
|
|
2020-07-08 09:40:58 +02:00
|
|
|
private applySubHighlights(safeSnippet: string, safeHighlights: string[]): T[] {
|
2016-02-10 21:25:32 +01:00
|
|
|
if (safeHighlights[1]) {
|
2015-11-29 14:00:58 +01:00
|
|
|
// recurse into this range to check for the next set of highlight matches
|
2016-02-10 21:25:32 +01:00
|
|
|
return this.applyHighlights(safeSnippet, safeHighlights.slice(1));
|
2017-10-11 18:56:17 +02:00
|
|
|
} else {
|
2015-11-29 14:00:58 +01:00
|
|
|
// no more highlights to be found, just return the unhighlighted string
|
2020-07-08 09:40:58 +02:00
|
|
|
return [this.processSnippet(safeSnippet, false)];
|
2015-12-24 00:50:35 +01:00
|
|
|
}
|
|
|
|
}
|
2020-07-08 09:40:58 +02:00
|
|
|
|
|
|
|
protected abstract processSnippet(snippet: string, highlight: boolean): T;
|
2016-02-17 20:50:04 +01:00
|
|
|
}
|
|
|
|
|
2020-07-08 09:40:58 +02:00
|
|
|
class HtmlHighlighter extends BaseHighlighter<string> {
|
2016-02-17 20:50:04 +01:00
|
|
|
/* highlight the given snippet if required
|
|
|
|
*
|
|
|
|
* snippet: content of the span; must have been sanitised
|
|
|
|
* highlight: true to highlight as a search match
|
|
|
|
*
|
|
|
|
* returns an HTML string
|
|
|
|
*/
|
2020-07-08 09:40:58 +02:00
|
|
|
protected processSnippet(snippet: string, highlight: boolean): string {
|
2016-02-17 20:50:04 +01:00
|
|
|
if (!highlight) {
|
|
|
|
// nothing required here
|
|
|
|
return snippet;
|
|
|
|
}
|
|
|
|
|
2020-07-08 09:40:58 +02:00
|
|
|
let span = `<span class="${this.highlightClass}">${snippet}</span>`;
|
2016-02-17 20:50:04 +01:00
|
|
|
|
|
|
|
if (this.highlightLink) {
|
2020-07-08 09:40:58 +02:00
|
|
|
span = `<a href="${encodeURI(this.highlightLink)}">${span}</a>`;
|
2016-02-17 20:50:04 +01:00
|
|
|
}
|
|
|
|
return span;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-07-08 09:40:58 +02:00
|
|
|
interface IOpts {
|
|
|
|
highlightLink?: string;
|
|
|
|
disableBigEmoji?: boolean;
|
|
|
|
stripReplyFallback?: boolean;
|
|
|
|
returnString?: boolean;
|
|
|
|
forComposerQuote?: boolean;
|
2021-06-30 14:01:26 +02:00
|
|
|
ref?: React.Ref<HTMLSpanElement>;
|
2020-07-08 09:40:58 +02:00
|
|
|
}
|
2015-11-27 16:02:32 +01:00
|
|
|
|
2021-06-22 18:23:13 +02:00
|
|
|
export interface IOptsReturnNode extends IOpts {
|
2021-06-30 14:01:26 +02:00
|
|
|
returnString: false | undefined;
|
2021-06-22 18:23:13 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
export interface IOptsReturnString extends IOpts {
|
|
|
|
returnString: true;
|
|
|
|
}
|
|
|
|
|
2018-05-21 04:48:59 +02:00
|
|
|
/* turn a matrix event body into html
|
|
|
|
*
|
|
|
|
* content: 'content' of the MatrixEvent
|
|
|
|
*
|
|
|
|
* highlights: optional list of words to highlight, ordered by longest word first
|
|
|
|
*
|
|
|
|
* opts.highlightLink: optional href to add to highlighted words
|
|
|
|
* opts.disableBigEmoji: optional argument to disable the big emoji class.
|
|
|
|
* opts.stripReplyFallback: optional argument specifying the event is a reply and so fallback needs removing
|
|
|
|
* opts.returnString: return an HTML string rather than JSX elements
|
2018-07-18 11:10:42 +02:00
|
|
|
* opts.forComposerQuote: optional param to lessen the url rewriting done by sanitization, for quoting into composer
|
2019-12-08 02:01:19 +01:00
|
|
|
* opts.ref: React ref to attach to any React components returned (not compatible with opts.returnString)
|
2018-05-21 04:48:59 +02:00
|
|
|
*/
|
2021-06-22 18:23:13 +02:00
|
|
|
export function bodyToHtml(content: IContent, highlights: string[], opts: IOptsReturnString): string;
|
|
|
|
export function bodyToHtml(content: IContent, highlights: string[], opts: IOptsReturnNode): ReactNode;
|
2020-07-08 09:40:58 +02:00
|
|
|
export function bodyToHtml(content: IContent, highlights: string[], opts: IOpts = {}) {
|
2021-12-02 10:25:12 +01:00
|
|
|
const isFormattedBody = content.format === "org.matrix.custom.html" && content.formatted_body;
|
2017-09-15 13:03:32 +02:00
|
|
|
let bodyHasEmoji = false;
|
2021-12-02 10:25:12 +01:00
|
|
|
let isHtmlMessage = false;
|
2017-09-15 13:03:32 +02:00
|
|
|
|
2018-07-18 11:10:42 +02:00
|
|
|
let sanitizeParams = sanitizeHtmlParams;
|
|
|
|
if (opts.forComposerQuote) {
|
|
|
|
sanitizeParams = composerSanitizeHtmlParams;
|
|
|
|
}
|
|
|
|
|
2020-07-08 09:40:58 +02:00
|
|
|
let strippedBody: string;
|
|
|
|
let safeBody: string;
|
|
|
|
let isDisplayedWithHtml: boolean;
|
2016-09-16 17:02:08 +02:00
|
|
|
// XXX: We sanitize the HTML whilst also highlighting its text nodes, to avoid accidentally trying
|
|
|
|
// to highlight HTML tags themselves. However, this does mean that we don't highlight textnodes which
|
|
|
|
// are interrupted by HTML tags (not that we did before) - e.g. foo<span/>bar won't get highlighted
|
|
|
|
// by an attempt to search for 'foobar'. Then again, the search query probably wouldn't work either
|
|
|
|
try {
|
|
|
|
if (highlights && highlights.length > 0) {
|
2017-10-11 18:56:17 +02:00
|
|
|
const highlighter = new HtmlHighlighter("mx_EventTile_searchHighlight", opts.highlightLink);
|
2021-07-01 12:18:07 +02:00
|
|
|
const safeHighlights = highlights
|
|
|
|
// sanitizeHtml can hang if an unclosed HTML tag is thrown at it
|
|
|
|
// A search for `<foo` will make the browser crash
|
2021-07-01 13:23:36 +02:00
|
|
|
// an alternative would be to escape HTML special characters
|
|
|
|
// but that would bring no additional benefit as the highlighter
|
|
|
|
// does not work with those special chars
|
2021-07-01 12:18:07 +02:00
|
|
|
.filter((highlight: string): boolean => !highlight.includes("<"))
|
|
|
|
.map((highlight: string): string => sanitizeHtml(highlight, sanitizeParams));
|
2018-07-18 11:10:42 +02:00
|
|
|
// XXX: hacky bodge to temporarily apply a textFilter to the sanitizeParams structure.
|
|
|
|
sanitizeParams.textFilter = function(safeText) {
|
2016-09-16 17:02:08 +02:00
|
|
|
return highlighter.applyHighlights(safeText, safeHighlights).join('');
|
|
|
|
};
|
2015-11-27 16:02:32 +01:00
|
|
|
}
|
2018-03-13 18:15:16 +01:00
|
|
|
|
2019-10-10 18:36:22 +02:00
|
|
|
let formattedBody = typeof content.formatted_body === 'string' ? content.formatted_body : null;
|
2020-08-12 11:40:25 +02:00
|
|
|
const plainBody = typeof content.body === 'string' ? content.body : "";
|
2019-10-10 18:36:22 +02:00
|
|
|
|
2021-10-15 18:42:44 +02:00
|
|
|
if (opts.stripReplyFallback && formattedBody) formattedBody = ReplyChain.stripHTMLReply(formattedBody);
|
|
|
|
strippedBody = opts.stripReplyFallback ? ReplyChain.stripPlainReply(plainBody) : plainBody;
|
2018-03-24 18:52:49 +01:00
|
|
|
|
2021-12-02 10:25:12 +01:00
|
|
|
bodyHasEmoji = mightContainEmoji(isFormattedBody ? formattedBody : plainBody);
|
2018-03-13 18:15:16 +01:00
|
|
|
|
|
|
|
// Only generate safeBody if the message was sent as org.matrix.custom.html
|
2021-12-02 10:25:12 +01:00
|
|
|
if (isFormattedBody) {
|
2018-05-17 20:12:51 +02:00
|
|
|
isDisplayedWithHtml = true;
|
2021-12-02 10:25:12 +01:00
|
|
|
|
2018-07-18 11:10:42 +02:00
|
|
|
safeBody = sanitizeHtml(formattedBody, sanitizeParams);
|
2021-12-02 10:25:12 +01:00
|
|
|
const phtml = cheerio.load(safeBody, {
|
|
|
|
// @ts-ignore: The `_useHtmlParser2` internal option is the
|
|
|
|
// simplest way to both parse and render using `htmlparser2`.
|
|
|
|
_useHtmlParser2: true,
|
|
|
|
decodeEntities: false,
|
|
|
|
});
|
|
|
|
const isPlainText = phtml.html() === phtml.root().text();
|
|
|
|
isHtmlMessage = isFormattedBody && !isPlainText;
|
2020-10-14 23:16:28 +02:00
|
|
|
|
2021-12-02 10:25:12 +01:00
|
|
|
if (isHtmlMessage && SettingsStore.getValue("feature_latex_maths")) {
|
2021-01-20 14:40:46 +01:00
|
|
|
// @ts-ignore - The types for `replaceWith` wrongly expect
|
|
|
|
// Cheerio instance to be returned.
|
2020-10-14 23:16:28 +02:00
|
|
|
phtml('div, span[data-mx-maths!=""]').replaceWith(function(i, e) {
|
|
|
|
return katex.renderToString(
|
|
|
|
AllHtmlEntities.decode(phtml(e).attr('data-mx-maths')),
|
|
|
|
{
|
|
|
|
throwOnError: false,
|
2021-05-10 17:09:12 +02:00
|
|
|
// @ts-ignore - `e` can be an Element, not just a Node
|
2020-10-14 23:16:28 +02:00
|
|
|
displayMode: e.name == 'div',
|
|
|
|
output: "htmlAndMathml",
|
|
|
|
});
|
2020-09-20 13:59:22 +02:00
|
|
|
});
|
2020-10-14 23:16:28 +02:00
|
|
|
safeBody = phtml.html();
|
2020-09-22 12:54:23 +02:00
|
|
|
}
|
2018-03-13 18:15:16 +01:00
|
|
|
}
|
2017-10-11 18:56:17 +02:00
|
|
|
} finally {
|
2018-07-18 11:10:42 +02:00
|
|
|
delete sanitizeParams.textFilter;
|
2016-09-16 17:02:08 +02:00
|
|
|
}
|
2016-07-05 00:34:57 +02:00
|
|
|
|
2020-12-21 16:24:59 +01:00
|
|
|
const contentBody = isDisplayedWithHtml ? safeBody : strippedBody;
|
2018-05-21 04:48:59 +02:00
|
|
|
if (opts.returnString) {
|
2020-12-21 16:24:59 +01:00
|
|
|
return contentBody;
|
2018-05-21 04:48:59 +02:00
|
|
|
}
|
|
|
|
|
2017-09-15 13:03:32 +02:00
|
|
|
let emojiBody = false;
|
2017-10-14 20:40:45 +02:00
|
|
|
if (!opts.disableBigEmoji && bodyHasEmoji) {
|
2020-12-21 16:24:59 +01:00
|
|
|
let contentBodyTrimmed = contentBody !== undefined ? contentBody.trim() : '';
|
2019-03-05 13:33:37 +01:00
|
|
|
|
|
|
|
// Ignore spaces in body text. Emojis with spaces in between should
|
|
|
|
// still be counted as purely emoji messages.
|
2019-03-06 15:53:24 +01:00
|
|
|
contentBodyTrimmed = contentBodyTrimmed.replace(WHITESPACE_REGEX, '');
|
2019-03-05 13:33:37 +01:00
|
|
|
|
|
|
|
// Remove zero width joiner characters from emoji messages. This ensures
|
|
|
|
// that emojis that are made up of multiple unicode characters are still
|
|
|
|
// presented as large.
|
|
|
|
contentBodyTrimmed = contentBodyTrimmed.replace(ZWJ_REGEX, '');
|
|
|
|
|
2019-05-19 18:06:21 +02:00
|
|
|
const match = BIGEMOJI_REGEX.exec(contentBodyTrimmed);
|
2019-05-19 17:48:15 +02:00
|
|
|
emojiBody = match && match[0] && match[0].length === contentBodyTrimmed.length &&
|
2019-04-10 19:00:04 +02:00
|
|
|
// Prevent user pills expanding for users with only emoji in
|
2019-10-01 04:37:50 +02:00
|
|
|
// their username. Permalinks (links in pills) can be any URL
|
|
|
|
// now, so we just check for an HTTP-looking thing.
|
2019-05-19 17:48:15 +02:00
|
|
|
(
|
2020-04-23 00:27:33 +02:00
|
|
|
strippedBody === safeBody || // replies have the html fallbacks, account for that here
|
|
|
|
content.formatted_body === undefined ||
|
2019-10-01 05:08:34 +02:00
|
|
|
(!content.formatted_body.includes("http:") &&
|
|
|
|
!content.formatted_body.includes("https:"))
|
2019-05-19 17:48:15 +02:00
|
|
|
);
|
2017-09-15 13:03:32 +02:00
|
|
|
}
|
2015-11-27 16:02:32 +01:00
|
|
|
|
2016-09-16 17:02:08 +02:00
|
|
|
const className = classNames({
|
|
|
|
'mx_EventTile_body': true,
|
|
|
|
'mx_EventTile_bigEmoji': emojiBody,
|
2019-05-23 11:22:30 +02:00
|
|
|
'markdown-body': isHtmlMessage && !emojiBody,
|
2016-09-16 17:02:08 +02:00
|
|
|
});
|
2018-03-13 18:15:16 +01:00
|
|
|
|
2018-05-17 20:12:51 +02:00
|
|
|
return isDisplayedWithHtml ?
|
2020-08-29 02:11:08 +02:00
|
|
|
<span
|
|
|
|
key="body"
|
|
|
|
ref={opts.ref}
|
|
|
|
className={className}
|
|
|
|
dangerouslySetInnerHTML={{ __html: safeBody }}
|
|
|
|
dir="auto"
|
|
|
|
/> : <span key="body" ref={opts.ref} className={className} dir="auto">{ strippedBody }</span>;
|
2016-09-16 17:02:08 +02:00
|
|
|
}
|
2015-11-27 16:02:32 +01:00
|
|
|
|
2019-01-31 22:26:07 +01:00
|
|
|
/**
|
|
|
|
* Linkifies the given string. This is a wrapper around 'linkifyjs/string'.
|
|
|
|
*
|
2020-03-04 22:14:03 +01:00
|
|
|
* @param {string} str string to linkify
|
2021-12-03 15:00:56 +01:00
|
|
|
* @param {object} [options] Options for linkifyString. Default: linkifyMatrixOptions
|
2020-03-04 22:14:03 +01:00
|
|
|
* @returns {string} Linkified string
|
2019-01-31 22:26:07 +01:00
|
|
|
*/
|
2021-12-03 15:00:56 +01:00
|
|
|
export function linkifyString(str: string, options = linkifyMatrixOptions): string {
|
2020-03-04 22:14:03 +01:00
|
|
|
return _linkifyString(str, options);
|
2019-01-31 22:26:07 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Linkifies the given DOM element. This is a wrapper around 'linkifyjs/element'.
|
|
|
|
*
|
|
|
|
* @param {object} element DOM element to linkify
|
2021-12-03 15:00:56 +01:00
|
|
|
* @param {object} [options] Options for linkifyElement. Default: linkifyMatrixOptions
|
2019-01-31 22:26:07 +01:00
|
|
|
* @returns {object}
|
|
|
|
*/
|
2021-12-03 15:00:56 +01:00
|
|
|
export function linkifyElement(element: HTMLElement, options = linkifyMatrixOptions): HTMLElement {
|
2019-01-31 22:26:07 +01:00
|
|
|
return _linkifyElement(element, options);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Linkify the given string and sanitize the HTML afterwards.
|
|
|
|
*
|
|
|
|
* @param {string} dirtyHtml The HTML string to sanitize and linkify
|
2021-12-03 15:00:56 +01:00
|
|
|
* @param {object} [options] Options for linkifyString. Default: linkifyMatrixOptions
|
2019-01-31 22:26:07 +01:00
|
|
|
* @returns {string}
|
|
|
|
*/
|
2021-12-03 15:00:56 +01:00
|
|
|
export function linkifyAndSanitizeHtml(dirtyHtml: string, options = linkifyMatrixOptions): string {
|
2020-03-04 22:14:03 +01:00
|
|
|
return sanitizeHtml(linkifyString(dirtyHtml, options), sanitizeHtmlParams);
|
2019-01-31 22:26:07 +01:00
|
|
|
}
|
2019-07-23 09:12:24 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns if a node is a block element or not.
|
|
|
|
* Only takes html nodes into account that are allowed in matrix messages.
|
|
|
|
*
|
|
|
|
* @param {Node} node
|
|
|
|
* @returns {bool}
|
|
|
|
*/
|
2021-06-22 18:23:13 +02:00
|
|
|
export function checkBlockNode(node: Node): boolean {
|
2019-07-23 09:12:24 +02:00
|
|
|
switch (node.nodeName) {
|
|
|
|
case "H1":
|
|
|
|
case "H2":
|
|
|
|
case "H3":
|
|
|
|
case "H4":
|
|
|
|
case "H5":
|
|
|
|
case "H6":
|
|
|
|
case "PRE":
|
|
|
|
case "BLOCKQUOTE":
|
|
|
|
case "P":
|
|
|
|
case "UL":
|
|
|
|
case "OL":
|
|
|
|
case "LI":
|
|
|
|
case "HR":
|
|
|
|
case "TABLE":
|
|
|
|
case "THEAD":
|
|
|
|
case "TBODY":
|
|
|
|
case "TR":
|
|
|
|
case "TH":
|
|
|
|
case "TD":
|
|
|
|
return true;
|
2020-09-20 15:20:35 +02:00
|
|
|
case "DIV":
|
|
|
|
// don't treat math nodes as block nodes for deserializing
|
|
|
|
return !(node as HTMLElement).hasAttribute("data-mx-maths");
|
2019-07-23 09:12:24 +02:00
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|