Merge pull request #6347 from robintown/emojibase-updates

pull/21833/head
Michael Telatynski 2021-07-20 12:57:00 +01:00 committed by GitHub
commit d8c4ab53e0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 67 additions and 87 deletions

View File

@ -65,8 +65,8 @@
"counterpart": "^0.18.6",
"diff-dom": "^4.2.2",
"diff-match-patch": "^1.0.5",
"emojibase-data": "^5.1.1",
"emojibase-regex": "^4.1.1",
"emojibase-data": "^6.2.0",
"emojibase-regex": "^5.1.3",
"escape-html": "^1.0.3",
"file-saver": "^2.0.5",
"filesize": "6.1.0",

View File

@ -33,7 +33,7 @@ import { IExtendedSanitizeOptions } from './@types/sanitize-html';
import linkifyMatrix from './linkify-matrix';
import SettingsStore from './settings/SettingsStore';
import { tryTransformPermalinkToLocalHref } from "./utils/permalinks/Permalinks";
import { SHORTCODE_TO_EMOJI, getEmojiFromUnicode } from "./emoji";
import { getEmojiFromUnicode } from "./emoji";
import ReplyThread from "./components/views/elements/ReplyThread";
import { mediaFromMxc } from "./customisations/Media";
@ -79,20 +79,8 @@ function mightContainEmoji(str: string): boolean {
* @return {String} The shortcode (such as :thumbup:)
*/
export function unicodeToShortcode(char: string): string {
const data = getEmojiFromUnicode(char);
return (data && data.shortcodes ? `:${data.shortcodes[0]}:` : '');
}
/**
* Returns the unicode character for an emoji shortcode
*
* @param {String} shortcode The shortcode (such as :thumbup:)
* @return {String} The emoji character; null if none exists
*/
export function shortcodeToUnicode(shortcode: string): string {
shortcode = shortcode.slice(1, shortcode.length - 1);
const data = SHORTCODE_TO_EMOJI.get(shortcode);
return data ? data.unicode : null;
const shortcodes = getEmojiFromUnicode(char).shortcodes;
return shortcodes.length > 0 ? `:${shortcodes[0]}:` : '';
}
export function processHtmlForSending(html: string): string {

View File

@ -25,7 +25,6 @@ import { PillCompletion } from './Components';
import { ICompletion, ISelectionRange } from './Autocompleter';
import { uniq, sortBy } from 'lodash';
import SettingsStore from "../settings/SettingsStore";
import { shortcodeToUnicode } from '../HtmlUtils';
import { EMOJI, IEmoji } from '../emoji';
import EMOTICON_REGEX from 'emojibase-regex/emoticon';
@ -36,20 +35,18 @@ const LIMIT = 20;
// anchored to only match from the start of parts otherwise it'll show emoji suggestions whilst typing matrix IDs
const EMOJI_REGEX = new RegExp('(' + EMOTICON_REGEX.source + '|(?:^|\\s):[+-\\w]*:?)$', 'g');
interface IEmojiShort {
interface ISortedEmoji {
emoji: IEmoji;
shortname: string;
_orderBy: number;
}
const EMOJI_SHORTNAMES: IEmojiShort[] = EMOJI.sort((a, b) => {
const SORTED_EMOJI: ISortedEmoji[] = EMOJI.sort((a, b) => {
if (a.group === b.group) {
return a.order - b.order;
}
return a.group - b.group;
}).map((emoji, index) => ({
emoji,
shortname: `:${emoji.shortcodes[0]}:`,
// Include the index so that we can preserve the original order
_orderBy: index,
}));
@ -64,20 +61,18 @@ function score(query, space) {
}
export default class EmojiProvider extends AutocompleteProvider {
matcher: QueryMatcher<IEmojiShort>;
nameMatcher: QueryMatcher<IEmojiShort>;
matcher: QueryMatcher<ISortedEmoji>;
nameMatcher: QueryMatcher<ISortedEmoji>;
constructor() {
super(EMOJI_REGEX);
this.matcher = new QueryMatcher<IEmojiShort>(EMOJI_SHORTNAMES, {
keys: ['emoji.emoticon', 'shortname'],
funcs: [
(o) => o.emoji.shortcodes.length > 1 ? o.emoji.shortcodes.slice(1).map(s => `:${s}:`).join(" ") : "", // aliases
],
this.matcher = new QueryMatcher<ISortedEmoji>(SORTED_EMOJI, {
keys: ['emoji.emoticon'],
funcs: [o => o.emoji.shortcodes.map(s => `:${s}:`)],
// For matching against ascii equivalents
shouldMatchWordsOnly: false,
});
this.nameMatcher = new QueryMatcher(EMOJI_SHORTNAMES, {
this.nameMatcher = new QueryMatcher(SORTED_EMOJI, {
keys: ['emoji.annotation'],
// For removing punctuation
shouldMatchWordsOnly: true,
@ -105,34 +100,33 @@ export default class EmojiProvider extends AutocompleteProvider {
const sorters = [];
// make sure that emoticons come first
sorters.push((c) => score(matchedString, c.emoji.emoticon || ""));
sorters.push(c => score(matchedString, c.emoji.emoticon || ""));
// then sort by score (Infinity if matchedString not in shortname)
sorters.push((c) => score(matchedString, c.shortname));
// then sort by score (Infinity if matchedString not in shortcode)
sorters.push(c => score(matchedString, c.emoji.shortcodes[0]));
// then sort by max score of all shortcodes, trim off the `:`
sorters.push((c) => Math.min(...c.emoji.shortcodes.map(s => score(matchedString.substring(1), s))));
// If the matchedString is not empty, sort by length of shortname. Example:
sorters.push(c => Math.min(
...c.emoji.shortcodes.map(s => score(matchedString.substring(1), s)),
));
// If the matchedString is not empty, sort by length of shortcode. Example:
// matchedString = ":bookmark"
// completions = [":bookmark:", ":bookmark_tabs:", ...]
if (matchedString.length > 1) {
sorters.push((c) => c.shortname.length);
sorters.push(c => c.emoji.shortcodes[0].length);
}
// Finally, sort by original ordering
sorters.push((c) => c._orderBy);
sorters.push(c => c._orderBy);
completions = sortBy(uniq(completions), sorters);
completions = completions.map(({ shortname }) => {
const unicode = shortcodeToUnicode(shortname);
return {
completion: unicode,
component: (
<PillCompletion title={shortname} aria-label={unicode}>
<span>{ unicode }</span>
</PillCompletion>
),
range,
};
}).slice(0, LIMIT);
completions = completions.map(c => ({
completion: c.emoji.unicode,
component: (
<PillCompletion title={`:${c.emoji.shortcodes[0]}:`} aria-label={c.emoji.unicode}>
<span>{ c.emoji.unicode }</span>
</PillCompletion>
),
range,
})).slice(0, LIMIT);
}
return completions;
}

View File

@ -32,6 +32,8 @@ export const CATEGORY_HEADER_HEIGHT = 22;
export const EMOJI_HEIGHT = 37;
export const EMOJIS_PER_ROW = 8;
const ZERO_WIDTH_JOINER = "\u200D";
interface IProps {
selectedEmojis?: Set<string>;
showQuickReactions?: boolean;
@ -180,7 +182,7 @@ class EmojiPicker extends React.Component<IProps, IState> {
} else {
emojis = cat.id === "recent" ? this.recentlyUsed : DATA_BY_CATEGORY[cat.id];
}
emojis = emojis.filter(emoji => emoji.filterString.includes(filter));
emojis = emojis.filter(emoji => this.emojiMatchesFilter(emoji, filter));
this.memoizedDataByCategory[cat.id] = emojis;
cat.enabled = emojis.length > 0;
// The setState below doesn't re-render the header and we already have the refs for updateVisibility, so...
@ -192,6 +194,10 @@ class EmojiPicker extends React.Component<IProps, IState> {
setTimeout(this.updateVisibility, 0);
};
private emojiMatchesFilter = (emoji: IEmoji, filter: string): boolean =>
[emoji.annotation, ...emoji.shortcodes, emoji.emoticon, ...emoji.unicode.split(ZERO_WIDTH_JOINER)]
.some(x => x?.includes(filter));
private onEnterFilter = () => {
const btn = this.bodyRef.current.querySelector<HTMLButtonElement>(".mx_EmojiPicker_item");
if (btn) {

View File

@ -27,11 +27,7 @@ interface IProps {
@replaceableComponent("views.emojipicker.Preview")
class Preview extends React.PureComponent<IProps> {
render() {
const {
unicode = "",
annotation = "",
shortcodes: [shortcode = ""],
} = this.props.emoji || {};
const { unicode, annotation, shortcodes: [shortcode] } = this.props.emoji;
return (
<div className="mx_EmojiPicker_footer mx_EmojiPicker_preview">

View File

@ -15,26 +15,23 @@ limitations under the License.
*/
import EMOJIBASE from 'emojibase-data/en/compact.json';
import SHORTCODES from 'emojibase-data/en/shortcodes/iamcal.json';
export interface IEmoji {
annotation: string;
group: number;
group?: number;
hexcode: string;
order: number;
order?: number;
shortcodes: string[];
tags: string[];
tags?: string[];
unicode: string;
skins?: Omit<IEmoji, "shortcodes" | "tags">[]; // Currently unused
emoticon?: string;
}
interface IEmojiWithFilterString extends IEmoji {
filterString?: string;
}
// The unicode is stored without the variant selector
const UNICODE_TO_EMOJI = new Map<string, IEmojiWithFilterString>(); // not exported as gets for it are handled by getEmojiFromUnicode
export const EMOTICON_TO_EMOJI = new Map<string, IEmojiWithFilterString>();
export const SHORTCODE_TO_EMOJI = new Map<string, IEmojiWithFilterString>();
const UNICODE_TO_EMOJI = new Map<string, IEmoji>(); // not exported as gets for it are handled by getEmojiFromUnicode
export const EMOTICON_TO_EMOJI = new Map<string, IEmoji>();
export const getEmojiFromUnicode = unicode => UNICODE_TO_EMOJI.get(stripVariation(unicode));
@ -62,17 +59,23 @@ export const DATA_BY_CATEGORY = {
"flags": [],
};
const ZERO_WIDTH_JOINER = "\u200D";
// Store various mappings from unicode/emoticon/shortcode to the Emoji objects
EMOJIBASE.forEach((emoji: IEmojiWithFilterString) => {
export const EMOJI: IEmoji[] = EMOJIBASE.map((emojiData: Omit<IEmoji, "shortcodes">) => {
// If there's ever a gap in shortcode coverage, we fudge it by
// filling it in with the emoji's CLDR annotation
const shortcodeData = SHORTCODES[emojiData.hexcode] ??
[emojiData.annotation.toLowerCase().replace(/ /g, "_")];
const emoji: IEmoji = {
...emojiData,
// Homogenize shortcodes by ensuring that everything is an array
shortcodes: typeof shortcodeData === "string" ? [shortcodeData] : shortcodeData,
};
const categoryId = EMOJIBASE_GROUP_ID_TO_CATEGORY[emoji.group];
if (DATA_BY_CATEGORY.hasOwnProperty(categoryId)) {
DATA_BY_CATEGORY[categoryId].push(emoji);
}
// This is used as the string to match the query against when filtering emojis
emoji.filterString = (`${emoji.annotation}\n${emoji.shortcodes.join('\n')}}\n${emoji.emoticon || ''}\n` +
`${emoji.unicode.split(ZERO_WIDTH_JOINER).join("\n")}`).toLowerCase();
// Add mapping from unicode to Emoji object
// The 'unicode' field that we use in emojibase has either
@ -88,12 +91,7 @@ EMOJIBASE.forEach((emoji: IEmojiWithFilterString) => {
EMOTICON_TO_EMOJI.set(emoji.emoticon, emoji);
}
if (emoji.shortcodes) {
// Add mapping from each shortcode to Emoji object
emoji.shortcodes.forEach(shortcode => {
SHORTCODE_TO_EMOJI.set(shortcode, emoji);
});
}
return emoji;
});
/**
@ -107,5 +105,3 @@ EMOJIBASE.forEach((emoji: IEmojiWithFilterString) => {
function stripVariation(str) {
return str.replace(/[\uFE00-\uFE0F]$/, "");
}
export const EMOJI: IEmoji[] = EMOJIBASE;

View File

@ -3027,15 +3027,15 @@ emoji-regex@^8.0.0:
resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-8.0.0.tgz#e818fd69ce5ccfcb404594f842963bf53164cc37"
integrity sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==
emojibase-data@^5.1.1:
version "5.1.1"
resolved "https://registry.yarnpkg.com/emojibase-data/-/emojibase-data-5.1.1.tgz#0a0d63dd07ce1376b3d27642f28cafa46f651de6"
integrity sha512-za/ma5SfogHjwUmGFnDbTvSfm8GGFvFaPS27GPti16YZSp5EPgz+UDsZCATXvJGit+oRNBbG/FtybXHKi2UQgQ==
emojibase-data@^6.2.0:
version "6.2.0"
resolved "https://registry.yarnpkg.com/emojibase-data/-/emojibase-data-6.2.0.tgz#db6c75c36905284fa623f4aa5f468d2be6ed364a"
integrity sha512-SWKaXD2QeQs06IE7qfJftsI5924Dqzp+V9xaa5RzZIEWhmlrG6Jt2iKwfgOPHu+5S8MEtOI7GdpKsXj46chXOw==
emojibase-regex@^4.1.1:
version "4.1.1"
resolved "https://registry.yarnpkg.com/emojibase-regex/-/emojibase-regex-4.1.1.tgz#6e781aca520281600fe7a177f1582c33cf1fc545"
integrity sha512-KSigB1zQkNKFygLZ5bAfHs87LJa1ni8QTQtq8lc53Y74NF3Dk2r7kfa8MpooTO8JBb5Xz660X4tSjDB+I+7elA==
emojibase-regex@^5.1.3:
version "5.1.3"
resolved "https://registry.yarnpkg.com/emojibase-regex/-/emojibase-regex-5.1.3.tgz#f0ef621ed6ec624becd2326f999fd4ea01b94554"
integrity sha512-gT8T9LxLA8VJdI+8KQtyykB9qKzd7WuUL3M2yw6y9tplFeufOUANg3UKVaKUvkMcRNvZsSElWhxcJrx8WPE12g==
encoding@^0.1.11:
version "0.1.13"