Merge pull request #6347 from robintown/emojibase-updates

pull/21833/head
Michael Telatynski 2021-07-20 12:57:00 +01:00 committed by GitHub
commit d8c4ab53e0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 67 additions and 87 deletions

View File

@ -65,8 +65,8 @@
"counterpart": "^0.18.6", "counterpart": "^0.18.6",
"diff-dom": "^4.2.2", "diff-dom": "^4.2.2",
"diff-match-patch": "^1.0.5", "diff-match-patch": "^1.0.5",
"emojibase-data": "^5.1.1", "emojibase-data": "^6.2.0",
"emojibase-regex": "^4.1.1", "emojibase-regex": "^5.1.3",
"escape-html": "^1.0.3", "escape-html": "^1.0.3",
"file-saver": "^2.0.5", "file-saver": "^2.0.5",
"filesize": "6.1.0", "filesize": "6.1.0",

View File

@ -33,7 +33,7 @@ import { IExtendedSanitizeOptions } from './@types/sanitize-html';
import linkifyMatrix from './linkify-matrix'; import linkifyMatrix from './linkify-matrix';
import SettingsStore from './settings/SettingsStore'; import SettingsStore from './settings/SettingsStore';
import { tryTransformPermalinkToLocalHref } from "./utils/permalinks/Permalinks"; import { tryTransformPermalinkToLocalHref } from "./utils/permalinks/Permalinks";
import { SHORTCODE_TO_EMOJI, getEmojiFromUnicode } from "./emoji"; import { getEmojiFromUnicode } from "./emoji";
import ReplyThread from "./components/views/elements/ReplyThread"; import ReplyThread from "./components/views/elements/ReplyThread";
import { mediaFromMxc } from "./customisations/Media"; import { mediaFromMxc } from "./customisations/Media";
@ -79,20 +79,8 @@ function mightContainEmoji(str: string): boolean {
* @return {String} The shortcode (such as :thumbup:) * @return {String} The shortcode (such as :thumbup:)
*/ */
export function unicodeToShortcode(char: string): string { export function unicodeToShortcode(char: string): string {
const data = getEmojiFromUnicode(char); const shortcodes = getEmojiFromUnicode(char).shortcodes;
return (data && data.shortcodes ? `:${data.shortcodes[0]}:` : ''); return shortcodes.length > 0 ? `:${shortcodes[0]}:` : '';
}
/**
* Returns the unicode character for an emoji shortcode
*
* @param {String} shortcode The shortcode (such as :thumbup:)
* @return {String} The emoji character; null if none exists
*/
export function shortcodeToUnicode(shortcode: string): string {
shortcode = shortcode.slice(1, shortcode.length - 1);
const data = SHORTCODE_TO_EMOJI.get(shortcode);
return data ? data.unicode : null;
} }
export function processHtmlForSending(html: string): string { export function processHtmlForSending(html: string): string {

View File

@ -25,7 +25,6 @@ import { PillCompletion } from './Components';
import { ICompletion, ISelectionRange } from './Autocompleter'; import { ICompletion, ISelectionRange } from './Autocompleter';
import { uniq, sortBy } from 'lodash'; import { uniq, sortBy } from 'lodash';
import SettingsStore from "../settings/SettingsStore"; import SettingsStore from "../settings/SettingsStore";
import { shortcodeToUnicode } from '../HtmlUtils';
import { EMOJI, IEmoji } from '../emoji'; import { EMOJI, IEmoji } from '../emoji';
import EMOTICON_REGEX from 'emojibase-regex/emoticon'; import EMOTICON_REGEX from 'emojibase-regex/emoticon';
@ -36,20 +35,18 @@ const LIMIT = 20;
// anchored to only match from the start of parts otherwise it'll show emoji suggestions whilst typing matrix IDs // anchored to only match from the start of parts otherwise it'll show emoji suggestions whilst typing matrix IDs
const EMOJI_REGEX = new RegExp('(' + EMOTICON_REGEX.source + '|(?:^|\\s):[+-\\w]*:?)$', 'g'); const EMOJI_REGEX = new RegExp('(' + EMOTICON_REGEX.source + '|(?:^|\\s):[+-\\w]*:?)$', 'g');
interface IEmojiShort { interface ISortedEmoji {
emoji: IEmoji; emoji: IEmoji;
shortname: string;
_orderBy: number; _orderBy: number;
} }
const EMOJI_SHORTNAMES: IEmojiShort[] = EMOJI.sort((a, b) => { const SORTED_EMOJI: ISortedEmoji[] = EMOJI.sort((a, b) => {
if (a.group === b.group) { if (a.group === b.group) {
return a.order - b.order; return a.order - b.order;
} }
return a.group - b.group; return a.group - b.group;
}).map((emoji, index) => ({ }).map((emoji, index) => ({
emoji, emoji,
shortname: `:${emoji.shortcodes[0]}:`,
// Include the index so that we can preserve the original order // Include the index so that we can preserve the original order
_orderBy: index, _orderBy: index,
})); }));
@ -64,20 +61,18 @@ function score(query, space) {
} }
export default class EmojiProvider extends AutocompleteProvider { export default class EmojiProvider extends AutocompleteProvider {
matcher: QueryMatcher<IEmojiShort>; matcher: QueryMatcher<ISortedEmoji>;
nameMatcher: QueryMatcher<IEmojiShort>; nameMatcher: QueryMatcher<ISortedEmoji>;
constructor() { constructor() {
super(EMOJI_REGEX); super(EMOJI_REGEX);
this.matcher = new QueryMatcher<IEmojiShort>(EMOJI_SHORTNAMES, { this.matcher = new QueryMatcher<ISortedEmoji>(SORTED_EMOJI, {
keys: ['emoji.emoticon', 'shortname'], keys: ['emoji.emoticon'],
funcs: [ funcs: [o => o.emoji.shortcodes.map(s => `:${s}:`)],
(o) => o.emoji.shortcodes.length > 1 ? o.emoji.shortcodes.slice(1).map(s => `:${s}:`).join(" ") : "", // aliases
],
// For matching against ascii equivalents // For matching against ascii equivalents
shouldMatchWordsOnly: false, shouldMatchWordsOnly: false,
}); });
this.nameMatcher = new QueryMatcher(EMOJI_SHORTNAMES, { this.nameMatcher = new QueryMatcher(SORTED_EMOJI, {
keys: ['emoji.annotation'], keys: ['emoji.annotation'],
// For removing punctuation // For removing punctuation
shouldMatchWordsOnly: true, shouldMatchWordsOnly: true,
@ -105,34 +100,33 @@ export default class EmojiProvider extends AutocompleteProvider {
const sorters = []; const sorters = [];
// make sure that emoticons come first // make sure that emoticons come first
sorters.push((c) => score(matchedString, c.emoji.emoticon || "")); sorters.push(c => score(matchedString, c.emoji.emoticon || ""));
// then sort by score (Infinity if matchedString not in shortname) // then sort by score (Infinity if matchedString not in shortcode)
sorters.push((c) => score(matchedString, c.shortname)); sorters.push(c => score(matchedString, c.emoji.shortcodes[0]));
// then sort by max score of all shortcodes, trim off the `:` // then sort by max score of all shortcodes, trim off the `:`
sorters.push((c) => Math.min(...c.emoji.shortcodes.map(s => score(matchedString.substring(1), s)))); sorters.push(c => Math.min(
// If the matchedString is not empty, sort by length of shortname. Example: ...c.emoji.shortcodes.map(s => score(matchedString.substring(1), s)),
));
// If the matchedString is not empty, sort by length of shortcode. Example:
// matchedString = ":bookmark" // matchedString = ":bookmark"
// completions = [":bookmark:", ":bookmark_tabs:", ...] // completions = [":bookmark:", ":bookmark_tabs:", ...]
if (matchedString.length > 1) { if (matchedString.length > 1) {
sorters.push((c) => c.shortname.length); sorters.push(c => c.emoji.shortcodes[0].length);
} }
// Finally, sort by original ordering // Finally, sort by original ordering
sorters.push((c) => c._orderBy); sorters.push(c => c._orderBy);
completions = sortBy(uniq(completions), sorters); completions = sortBy(uniq(completions), sorters);
completions = completions.map(({ shortname }) => { completions = completions.map(c => ({
const unicode = shortcodeToUnicode(shortname); completion: c.emoji.unicode,
return { component: (
completion: unicode, <PillCompletion title={`:${c.emoji.shortcodes[0]}:`} aria-label={c.emoji.unicode}>
component: ( <span>{ c.emoji.unicode }</span>
<PillCompletion title={shortname} aria-label={unicode}> </PillCompletion>
<span>{ unicode }</span> ),
</PillCompletion> range,
), })).slice(0, LIMIT);
range,
};
}).slice(0, LIMIT);
} }
return completions; return completions;
} }

View File

@ -32,6 +32,8 @@ export const CATEGORY_HEADER_HEIGHT = 22;
export const EMOJI_HEIGHT = 37; export const EMOJI_HEIGHT = 37;
export const EMOJIS_PER_ROW = 8; export const EMOJIS_PER_ROW = 8;
const ZERO_WIDTH_JOINER = "\u200D";
interface IProps { interface IProps {
selectedEmojis?: Set<string>; selectedEmojis?: Set<string>;
showQuickReactions?: boolean; showQuickReactions?: boolean;
@ -180,7 +182,7 @@ class EmojiPicker extends React.Component<IProps, IState> {
} else { } else {
emojis = cat.id === "recent" ? this.recentlyUsed : DATA_BY_CATEGORY[cat.id]; emojis = cat.id === "recent" ? this.recentlyUsed : DATA_BY_CATEGORY[cat.id];
} }
emojis = emojis.filter(emoji => emoji.filterString.includes(filter)); emojis = emojis.filter(emoji => this.emojiMatchesFilter(emoji, filter));
this.memoizedDataByCategory[cat.id] = emojis; this.memoizedDataByCategory[cat.id] = emojis;
cat.enabled = emojis.length > 0; cat.enabled = emojis.length > 0;
// The setState below doesn't re-render the header and we already have the refs for updateVisibility, so... // The setState below doesn't re-render the header and we already have the refs for updateVisibility, so...
@ -192,6 +194,10 @@ class EmojiPicker extends React.Component<IProps, IState> {
setTimeout(this.updateVisibility, 0); setTimeout(this.updateVisibility, 0);
}; };
private emojiMatchesFilter = (emoji: IEmoji, filter: string): boolean =>
[emoji.annotation, ...emoji.shortcodes, emoji.emoticon, ...emoji.unicode.split(ZERO_WIDTH_JOINER)]
.some(x => x?.includes(filter));
private onEnterFilter = () => { private onEnterFilter = () => {
const btn = this.bodyRef.current.querySelector<HTMLButtonElement>(".mx_EmojiPicker_item"); const btn = this.bodyRef.current.querySelector<HTMLButtonElement>(".mx_EmojiPicker_item");
if (btn) { if (btn) {

View File

@ -27,11 +27,7 @@ interface IProps {
@replaceableComponent("views.emojipicker.Preview") @replaceableComponent("views.emojipicker.Preview")
class Preview extends React.PureComponent<IProps> { class Preview extends React.PureComponent<IProps> {
render() { render() {
const { const { unicode, annotation, shortcodes: [shortcode] } = this.props.emoji;
unicode = "",
annotation = "",
shortcodes: [shortcode = ""],
} = this.props.emoji || {};
return ( return (
<div className="mx_EmojiPicker_footer mx_EmojiPicker_preview"> <div className="mx_EmojiPicker_footer mx_EmojiPicker_preview">

View File

@ -15,26 +15,23 @@ limitations under the License.
*/ */
import EMOJIBASE from 'emojibase-data/en/compact.json'; import EMOJIBASE from 'emojibase-data/en/compact.json';
import SHORTCODES from 'emojibase-data/en/shortcodes/iamcal.json';
export interface IEmoji { export interface IEmoji {
annotation: string; annotation: string;
group: number; group?: number;
hexcode: string; hexcode: string;
order: number; order?: number;
shortcodes: string[]; shortcodes: string[];
tags: string[]; tags?: string[];
unicode: string; unicode: string;
skins?: Omit<IEmoji, "shortcodes" | "tags">[]; // Currently unused
emoticon?: string; emoticon?: string;
} }
interface IEmojiWithFilterString extends IEmoji {
filterString?: string;
}
// The unicode is stored without the variant selector // The unicode is stored without the variant selector
const UNICODE_TO_EMOJI = new Map<string, IEmojiWithFilterString>(); // not exported as gets for it are handled by getEmojiFromUnicode const UNICODE_TO_EMOJI = new Map<string, IEmoji>(); // not exported as gets for it are handled by getEmojiFromUnicode
export const EMOTICON_TO_EMOJI = new Map<string, IEmojiWithFilterString>(); export const EMOTICON_TO_EMOJI = new Map<string, IEmoji>();
export const SHORTCODE_TO_EMOJI = new Map<string, IEmojiWithFilterString>();
export const getEmojiFromUnicode = unicode => UNICODE_TO_EMOJI.get(stripVariation(unicode)); export const getEmojiFromUnicode = unicode => UNICODE_TO_EMOJI.get(stripVariation(unicode));
@ -62,17 +59,23 @@ export const DATA_BY_CATEGORY = {
"flags": [], "flags": [],
}; };
const ZERO_WIDTH_JOINER = "\u200D";
// Store various mappings from unicode/emoticon/shortcode to the Emoji objects // Store various mappings from unicode/emoticon/shortcode to the Emoji objects
EMOJIBASE.forEach((emoji: IEmojiWithFilterString) => { export const EMOJI: IEmoji[] = EMOJIBASE.map((emojiData: Omit<IEmoji, "shortcodes">) => {
// If there's ever a gap in shortcode coverage, we fudge it by
// filling it in with the emoji's CLDR annotation
const shortcodeData = SHORTCODES[emojiData.hexcode] ??
[emojiData.annotation.toLowerCase().replace(/ /g, "_")];
const emoji: IEmoji = {
...emojiData,
// Homogenize shortcodes by ensuring that everything is an array
shortcodes: typeof shortcodeData === "string" ? [shortcodeData] : shortcodeData,
};
const categoryId = EMOJIBASE_GROUP_ID_TO_CATEGORY[emoji.group]; const categoryId = EMOJIBASE_GROUP_ID_TO_CATEGORY[emoji.group];
if (DATA_BY_CATEGORY.hasOwnProperty(categoryId)) { if (DATA_BY_CATEGORY.hasOwnProperty(categoryId)) {
DATA_BY_CATEGORY[categoryId].push(emoji); DATA_BY_CATEGORY[categoryId].push(emoji);
} }
// This is used as the string to match the query against when filtering emojis
emoji.filterString = (`${emoji.annotation}\n${emoji.shortcodes.join('\n')}}\n${emoji.emoticon || ''}\n` +
`${emoji.unicode.split(ZERO_WIDTH_JOINER).join("\n")}`).toLowerCase();
// Add mapping from unicode to Emoji object // Add mapping from unicode to Emoji object
// The 'unicode' field that we use in emojibase has either // The 'unicode' field that we use in emojibase has either
@ -88,12 +91,7 @@ EMOJIBASE.forEach((emoji: IEmojiWithFilterString) => {
EMOTICON_TO_EMOJI.set(emoji.emoticon, emoji); EMOTICON_TO_EMOJI.set(emoji.emoticon, emoji);
} }
if (emoji.shortcodes) { return emoji;
// Add mapping from each shortcode to Emoji object
emoji.shortcodes.forEach(shortcode => {
SHORTCODE_TO_EMOJI.set(shortcode, emoji);
});
}
}); });
/** /**
@ -107,5 +105,3 @@ EMOJIBASE.forEach((emoji: IEmojiWithFilterString) => {
function stripVariation(str) { function stripVariation(str) {
return str.replace(/[\uFE00-\uFE0F]$/, ""); return str.replace(/[\uFE00-\uFE0F]$/, "");
} }
export const EMOJI: IEmoji[] = EMOJIBASE;

View File

@ -3027,15 +3027,15 @@ emoji-regex@^8.0.0:
resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-8.0.0.tgz#e818fd69ce5ccfcb404594f842963bf53164cc37" resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-8.0.0.tgz#e818fd69ce5ccfcb404594f842963bf53164cc37"
integrity sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A== integrity sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==
emojibase-data@^5.1.1: emojibase-data@^6.2.0:
version "5.1.1" version "6.2.0"
resolved "https://registry.yarnpkg.com/emojibase-data/-/emojibase-data-5.1.1.tgz#0a0d63dd07ce1376b3d27642f28cafa46f651de6" resolved "https://registry.yarnpkg.com/emojibase-data/-/emojibase-data-6.2.0.tgz#db6c75c36905284fa623f4aa5f468d2be6ed364a"
integrity sha512-za/ma5SfogHjwUmGFnDbTvSfm8GGFvFaPS27GPti16YZSp5EPgz+UDsZCATXvJGit+oRNBbG/FtybXHKi2UQgQ== integrity sha512-SWKaXD2QeQs06IE7qfJftsI5924Dqzp+V9xaa5RzZIEWhmlrG6Jt2iKwfgOPHu+5S8MEtOI7GdpKsXj46chXOw==
emojibase-regex@^4.1.1: emojibase-regex@^5.1.3:
version "4.1.1" version "5.1.3"
resolved "https://registry.yarnpkg.com/emojibase-regex/-/emojibase-regex-4.1.1.tgz#6e781aca520281600fe7a177f1582c33cf1fc545" resolved "https://registry.yarnpkg.com/emojibase-regex/-/emojibase-regex-5.1.3.tgz#f0ef621ed6ec624becd2326f999fd4ea01b94554"
integrity sha512-KSigB1zQkNKFygLZ5bAfHs87LJa1ni8QTQtq8lc53Y74NF3Dk2r7kfa8MpooTO8JBb5Xz660X4tSjDB+I+7elA== integrity sha512-gT8T9LxLA8VJdI+8KQtyykB9qKzd7WuUL3M2yw6y9tplFeufOUANg3UKVaKUvkMcRNvZsSElWhxcJrx8WPE12g==
encoding@^0.1.11: encoding@^0.1.11:
version "0.1.13" version "0.1.13"