Strip all variation selectors on emoji

...when inserting into or looking up in the unicode to emoji map.

This broke with emojibase 4.2.0 which changed the type of a whole
load of emojis to 'text' when previously they were 'emoji'. This
caused them to get the 'text' variant of the unicode string which
has the text variation selector (15) appended instead of the emoji
variation selector (16). We were only stripping the emoji selector,
so upgrading to 4.2.0 caused riot to fail to find the heart in the
unicode map, which therefore prevented the app from starting.
pull/21833/head
David Baker 2020-01-07 18:48:55 +00:00
parent 0df16631c2
commit f31cc7dfee
1 changed files with 28 additions and 11 deletions

View File

@ -16,14 +16,12 @@ limitations under the License.
import EMOJIBASE from 'emojibase-data/en/compact.json';
export const VARIATION_SELECTOR = String.fromCharCode(0xFE0F);
// The unicode is stored without the variant selector
const UNICODE_TO_EMOJI = new Map(); // not exported as gets for it are handled by getEmojiFromUnicode
export const EMOTICON_TO_EMOJI = new Map();
export const SHORTCODE_TO_EMOJI = new Map();
export const getEmojiFromUnicode = unicode => UNICODE_TO_EMOJI.get(unicode.replace(VARIATION_SELECTOR, ""));
export const getEmojiFromUnicode = unicode => UNICODE_TO_EMOJI.get(stripVariation(unicode));
const EMOJIBASE_GROUP_ID_TO_CATEGORY = [
"people", // smileys
@ -51,13 +49,6 @@ export const DATA_BY_CATEGORY = {
// Store various mappings from unicode/emoticon/shortcode to the Emoji objects
EMOJIBASE.forEach(emoji => {
if (emoji.unicode.includes(VARIATION_SELECTOR)) {
// Clone data into variation-less version
emoji = Object.assign({}, emoji, {
unicode: emoji.unicode.replace(VARIATION_SELECTOR, ""),
});
}
const categoryId = EMOJIBASE_GROUP_ID_TO_CATEGORY[emoji.group];
if (DATA_BY_CATEGORY.hasOwnProperty(categoryId)) {
DATA_BY_CATEGORY[categoryId].push(emoji);
@ -66,7 +57,13 @@ EMOJIBASE.forEach(emoji => {
emoji.filterString = `${emoji.annotation}\n${emoji.shortcodes.join('\n')}}\n${emoji.emoticon || ''}`.toLowerCase();
// Add mapping from unicode to Emoji object
UNICODE_TO_EMOJI.set(emoji.unicode, emoji);
// The 'unicode' field that we use in emojibase has either
// VS15 or VS16 appended to any characters that can take
// variation selectors. Which one it appends depends
// on whether emojibase considers their type to be 'text' or
// 'emoji'. We therefore strip any variation chars from strings
// both when building the map and when looking up.
UNICODE_TO_EMOJI.set(stripVariation(emoji.unicode), emoji);
if (emoji.emoticon) {
// Add mapping from emoticon to Emoji object
@ -80,3 +77,23 @@ EMOJIBASE.forEach(emoji => {
});
}
});
/**
* Strips variation selectors from a string
* NB. Skin tone modifers are not variation selectors:
* this function does not touch them. (Should it?)
*
* @param {string} str string to strip
* @returns {string} stripped string
*/
function stripVariation(str) {
let ret = '';
for (let i = 0; i < str.length; ++i) {
const charCode = str.charCodeAt(i);
// append to output only if it's outside the variation selector range
if (charCode < 0xFE00 && charCode > 0xFE0F) {
ret += str.charAt(i);
}
}
return ret;
}