From 86a95cfff7c1ca727f1d21019f5dac35351a1673 Mon Sep 17 00:00:00 2001 From: Michael Telatynski <7t3chguy@gmail.com> Date: Wed, 26 Jun 2024 10:34:07 +0100 Subject: [PATCH] Switch from graphemer to Intl.Segmenter (#12697) Signed-off-by: Michael Telatynski <7t3chguy@gmail.com> --- package.json | 1 - src/HtmlUtils.tsx | 11 +++++------ src/editor/parts.ts | 12 +++++------- src/utils/strings.ts | 8 ++++---- tsconfig.json | 2 +- 5 files changed, 15 insertions(+), 19 deletions(-) diff --git a/package.json b/package.json index 45c955aa92..1c963e8bd6 100644 --- a/package.json +++ b/package.json @@ -96,7 +96,6 @@ "filesize": "10.1.2", "github-markdown-css": "^5.5.1", "glob-to-regexp": "^0.4.1", - "graphemer": "^1.4.0", "highlight.js": "^11.3.1", "html-entities": "^2.0.0", "is-ip": "^3.1.0", diff --git a/src/HtmlUtils.tsx b/src/HtmlUtils.tsx index 655be4ac92..d8c154440b 100644 --- a/src/HtmlUtils.tsx +++ b/src/HtmlUtils.tsx @@ -26,7 +26,6 @@ import { decode } from "html-entities"; import { IContent } from "matrix-js-sdk/src/matrix"; import { Optional } from "matrix-events-sdk"; import escapeHtml from "escape-html"; -import GraphemeSplitter from "graphemer"; import { getEmojiFromUnicode } from "@matrix-org/emojibase-bindings"; import { IExtendedSanitizeOptions } from "./@types/sanitize-html"; @@ -34,6 +33,7 @@ import SettingsStore from "./settings/SettingsStore"; import { stripHTMLReply, stripPlainReply } from "./utils/Reply"; import { PERMITTED_URL_SCHEMES } from "./utils/UrlUtils"; import { sanitizeHtmlParams, transformTags } from "./Linkify"; +import { graphemeSegmenter } from "./utils/strings"; export { Linkify, linkifyElement, linkifyAndSanitizeHtml } from "./Linkify"; @@ -265,17 +265,16 @@ export function formatEmojis(message: string | undefined, isHtmlMessage?: boolea let text = ""; let key = 0; - const splitter = new GraphemeSplitter(); - for (const char of splitter.iterateGraphemes(message)) { - if (EMOJIBASE_REGEX.test(char)) { + for (const data of graphemeSegmenter.segment(message)) { + if (EMOJIBASE_REGEX.test(data.segment)) { if (text) { result.push(text); text = ""; } - result.push(emojiToSpan(char, key)); + result.push(emojiToSpan(data.segment, key)); key++; } else { - text += char; + text += data.segment; } } if (text) { diff --git a/src/editor/parts.ts b/src/editor/parts.ts index 12262280ae..3f482357d1 100644 --- a/src/editor/parts.ts +++ b/src/editor/parts.ts @@ -17,7 +17,6 @@ limitations under the License. import EMOJIBASE_REGEX from "emojibase-regex"; import { MatrixClient, RoomMember, Room } from "matrix-js-sdk/src/matrix"; -import GraphemeSplitter from "graphemer"; import AutocompleteWrapperModel, { GetAutocompleterComponent, UpdateCallback, UpdateQuery } from "./autocomplete"; import { unicodeToShortcode } from "../HtmlUtils"; @@ -25,7 +24,7 @@ import * as Avatar from "../Avatar"; import defaultDispatcher from "../dispatcher/dispatcher"; import { Action } from "../dispatcher/actions"; import SettingsStore from "../settings/SettingsStore"; -import { getFirstGrapheme } from "../utils/strings"; +import { getFirstGrapheme, graphemeSegmenter } from "../utils/strings"; const REGIONAL_EMOJI_SEPARATOR = String.fromCodePoint(0x200b); @@ -650,19 +649,18 @@ export class PartCreator { const parts: (PlainPart | EmojiPart)[] = []; let plainText = ""; - const splitter = new GraphemeSplitter(); - for (const char of splitter.iterateGraphemes(text)) { - if (EMOJIBASE_REGEX.test(char)) { + for (const data of graphemeSegmenter.segment(text)) { + if (EMOJIBASE_REGEX.test(data.segment)) { if (plainText) { parts.push(this.plain(plainText)); plainText = ""; } - parts.push(this.emoji(char)); + parts.push(this.emoji(data.segment)); if (PartCreator.isRegionalIndicator(text)) { parts.push(this.plain(REGIONAL_EMOJI_SEPARATOR)); } } else { - plainText += char; + plainText += data.segment; } } if (plainText) { diff --git a/src/utils/strings.ts b/src/utils/strings.ts index 53039cdc8f..8f7446e491 100644 --- a/src/utils/strings.ts +++ b/src/utils/strings.ts @@ -21,7 +21,6 @@ limitations under the License. * @param text the plaintext to put in the user's clipboard */ import { logger } from "matrix-js-sdk/src/logger"; -import GraphemeSplitter from "graphemer"; export async function copyPlaintext(text: string): Promise { try { @@ -85,6 +84,8 @@ export function getSelectedText(): string { return window.getSelection()!.toString(); } +export const graphemeSegmenter = new Intl.Segmenter(); + /** * Returns the first grapheme in the given string, * especially useful for strings containing emoji, will not break compound emoji up. @@ -92,7 +93,6 @@ export function getSelectedText(): string { * @returns the first grapheme or an empty string if given an empty string */ export function getFirstGrapheme(str: string): string { - const splitter = new GraphemeSplitter(); - const result = splitter.iterateGraphemes(str).next(); - return result.done ? "" : result.value; + const result = graphemeSegmenter.segment(str)[Symbol.iterator]().next(); + return result.done ? "" : result.value.segment; } diff --git a/tsconfig.json b/tsconfig.json index 382874c24f..3118f598c4 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -12,7 +12,7 @@ "outDir": "./lib", "declaration": true, "jsx": "react", - "lib": ["es2021", "dom", "dom.iterable"], + "lib": ["es2022", "dom", "dom.iterable"], "strict": true }, "include": [