From 803e8f93e3fcb2c8da28f0e85f044341cb6f84d7 Mon Sep 17 00:00:00 2001 From: David Baker Date: Fri, 15 Sep 2017 11:43:55 +0100 Subject: [PATCH] Fix emojification of symbol characters Emojione has graphics for a lot of the symbol / dingbat characters which are within the basic multilingual plane, but the new fast-path regex was only detecthing surrogate pairs, so not counting the symbols as emoji. --- src/HtmlUtils.js | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/HtmlUtils.js b/src/HtmlUtils.js index 671f9b4955..4693394ef3 100644 --- a/src/HtmlUtils.js +++ b/src/HtmlUtils.js @@ -32,7 +32,15 @@ emojione.imagePathPNG = 'emojione/png/'; // Use SVGs for emojis emojione.imageType = 'svg'; -const SIMPLE_EMOJI_PATTERN = /([\ud800-\udbff])([\udc00-\udfff])/; +// Anything outside the basic multilingual plane will be a surrogate pair +const SURROGATE_PAIR_PATTERN = /([\ud800-\udbff])([\udc00-\udfff])/; +// And there a bunch more symbol characters that emojione has within the +// BMP, so this includes the ranges from 'letterlike symbols' to +// 'miscellaneous symbols and arrows' which should catch all of them +// (with plenty of false positives, but that's OK) +const SYMBOL_PATTERN = /([\u2100-\u2bff])/; + +// And this is emojione's complete regex const EMOJI_REGEX = new RegExp(emojione.unicodeRegexp+"+", "gi"); const COLOR_REGEX = /^#[0-9a-fA-F]{6}$/; @@ -44,7 +52,7 @@ const COLOR_REGEX = /^#[0-9a-fA-F]{6}$/; * unicodeToImage uses this function. */ export function containsEmoji(str) { - return SIMPLE_EMOJI_PATTERN.test(str); + return SURROGATE_PAIR_PATTERN.test(str) || SYMBOL_PATTERN.test(str); } /* modified from https://github.com/Ranks/emojione/blob/master/lib/js/emojione.js