riot-web/src/languageHandler.tsx

703 lines
29 KiB
TypeScript

/*
Copyright 2024 New Vector Ltd.
Copyright 2019-2022 The Matrix.org Foundation C.I.C.
Copyright 2019 Michael Telatynski <7t3chguy@gmail.com>
Copyright 2017 MTRNord and Cooperative EITA
Copyright 2017 Vector Creations Ltd.
SPDX-License-Identifier: AGPL-3.0-only OR GPL-3.0-only
Please see LICENSE files in the repository root for full details.
*/
import counterpart from "counterpart";
import React from "react";
import { logger } from "matrix-js-sdk/src/logger";
import { Optional } from "matrix-events-sdk";
import { MapWithDefault } from "matrix-js-sdk/src/utils";
import { normalizeLanguageKey, TranslationKey as _TranslationKey, KEY_SEPARATOR } from "matrix-web-i18n";
import { TranslationStringsObject } from "@matrix-org/react-sdk-module-api";
import _ from "lodash";
import type Translations from "./i18n/strings/en_EN.json";
import SettingsStore from "./settings/SettingsStore";
import PlatformPeg from "./PlatformPeg";
import { SettingLevel } from "./settings/SettingLevel";
import { retry } from "./utils/promise";
import SdkConfig from "./SdkConfig";
import { ModuleRunner } from "./modules/ModuleRunner";
// @ts-ignore - $webapp is a webpack resolve alias pointing to the output directory, see webpack config
import webpackLangJsonUrl from "$webapp/i18n/languages.json";
export { normalizeLanguageKey, getNormalizedLanguageKeys } from "matrix-web-i18n";
const i18nFolder = "i18n/";
// Control whether to also return original, untranslated strings
// Useful for debugging and testing
const ANNOTATE_STRINGS = false;
// We use english strings as keys, some of which contain full stops
counterpart.setSeparator(KEY_SEPARATOR);
// see `translateWithFallback` for an explanation of fallback handling
const FALLBACK_LOCALE = "en";
counterpart.setFallbackLocale(FALLBACK_LOCALE);
export interface ErrorOptions {
// Because we're mixing the substitution variables and `cause` into the same object
// below, we want them to always explicitly say whether there is an underlying error
// or not to avoid typos of "cause" slipping through unnoticed.
cause: unknown | undefined;
}
/**
* Used to rethrow an error with a user-friendly translatable message while maintaining
* access to that original underlying error. Downstream consumers can display the
* `translatedMessage` property in the UI and inspect the underlying error with the
* `cause` property.
*
* The error message will display as English in the console and logs so Element
* developers can easily understand the error and find the source in the code. It also
* helps tools like Sentry deduplicate the error, or just generally searching in
* rageshakes to find all instances regardless of the users locale.
*
* @param message - The untranslated error message text, e.g "Something went wrong with %(foo)s".
* @param substitutionVariablesAndCause - Variable substitutions for the translation and
* original cause of the error. If there is no cause, just pass `undefined`, e.g { foo:
* 'bar', cause: err || undefined }
*/
export class UserFriendlyError extends Error {
public readonly translatedMessage: string;
public constructor(
message: TranslationKey,
substitutionVariablesAndCause?: Omit<IVariables, keyof ErrorOptions> | ErrorOptions,
) {
// Prevent "Could not find /%\(cause\)s/g in x" logs to the console by removing it from the list
const { cause, ...substitutionVariables } = substitutionVariablesAndCause ?? {};
const errorOptions = { cause };
// Create the error with the English version of the message that we want to show up in the logs
const englishTranslatedMessage = _t(message, { ...substitutionVariables, locale: "en" });
super(englishTranslatedMessage, errorOptions);
// Also provide a translated version of the error in the users locale to display
this.translatedMessage = _t(message, substitutionVariables);
}
}
export function getUserLanguage(): string {
const language = SettingsStore.getValue("language", null, /*excludeDefault:*/ true);
if (typeof language === "string" && language !== "") {
return language;
} else {
return normalizeLanguageKey(getLanguageFromBrowser());
}
}
/**
* A type representing the union of possible keys into the translation file using `|` delimiter to access nested fields.
* @example `common|error` to access `error` within the `common` sub-object.
* {
* "common": {
* "error": "Error"
* }
* }
*/
export type TranslationKey = _TranslationKey<typeof Translations>;
// Function which only purpose is to mark that a string is translatable
// Does not actually do anything. It's helpful for automatic extraction of translatable strings
export function _td(s: TranslationKey): TranslationKey {
return s;
}
/**
* to improve screen reader experience translations that are not in the main page language
* eg a translation that fell back to english from another language
* should be wrapped with an appropriate `lang='en'` attribute
* counterpart's `translate` doesn't expose a way to determine if the resulting translation
* is in the target locale or a fallback locale
* for this reason, force fallbackLocale === locale in the first call to translate
* and fallback 'manually' so we can mark fallback strings appropriately
* */
const translateWithFallback = (text: string, options?: IVariables): { translated: string; isFallback?: boolean } => {
const translated = counterpart.translate(text, { ...options, fallbackLocale: counterpart.getLocale() });
if (!translated || translated.startsWith("missing translation:")) {
const fallbackTranslated = counterpart.translate(text, { ...options, locale: FALLBACK_LOCALE });
if (
(!fallbackTranslated || fallbackTranslated.startsWith("missing translation:")) &&
process.env.NODE_ENV !== "development"
) {
// Even the translation via FALLBACK_LOCALE failed; this can happen if
//
// 1. The string isn't in the translations dictionary, usually because you're in develop
// and haven't run yarn i18n
// 2. Loading the translation resources over the network failed, which can happen due to
// to network or if the client tried to load a translation that's been removed from the
// server.
//
// At this point, its the lesser evil to show the untranslated text, which
// will be in English, so the user can still make out *something*, rather than an opaque
// "missing translation" error.
//
// Don't do this in develop so people remember to run yarn i18n.
return { translated: text, isFallback: true };
}
return { translated: fallbackTranslated, isFallback: true };
}
return { translated };
};
// Wrapper for counterpart's translation function so that it handles nulls and undefineds properly
// Takes the same arguments as counterpart.translate()
function safeCounterpartTranslate(text: string, variables?: IVariables): { translated: string; isFallback?: boolean } {
// Don't do substitutions in counterpart. We handle it ourselves so we can replace with React components
// However, still pass the variables to counterpart so that it can choose the correct plural if count is given
// It is enough to pass the count variable, but in the future counterpart might make use of other information too
const options: IVariables & {
interpolate: boolean;
} = { ...variables, interpolate: false };
// Horrible hack to avoid https://github.com/vector-im/element-web/issues/4191
// The interpolation library that counterpart uses does not support undefined/null
// values and instead will throw an error. This is a problem since everywhere else
// in JS land passing undefined/null will simply stringify instead, and when converting
// valid ES6 template strings to i18n strings it's extremely easy to pass undefined/null
// if there are no existing null guards. To avoid this making the app completely inoperable,
// we'll check all the values for undefined/null and stringify them here.
if (options && typeof options === "object") {
Object.keys(options).forEach((k) => {
if (options[k] === undefined) {
logger.warn("safeCounterpartTranslate called with undefined interpolation name: " + k);
options[k] = "undefined";
}
if (options[k] === null) {
logger.warn("safeCounterpartTranslate called with null interpolation name: " + k);
options[k] = "null";
}
});
}
return translateWithFallback(text, options);
}
/**
* The value a variable or tag can take for a translation interpolation.
*/
type SubstitutionValue = number | string | React.ReactNode | ((sub: string) => React.ReactNode);
export interface IVariables {
count?: number;
[key: string]: SubstitutionValue;
}
export type Tags = Record<string, SubstitutionValue>;
export type TranslatedString = string | React.ReactNode;
// For development/testing purposes it is useful to also output the original string
// Don't do that for release versions
const annotateStrings = (result: TranslatedString, translationKey: TranslationKey): TranslatedString => {
if (!ANNOTATE_STRINGS) {
return result;
}
if (typeof result === "string") {
return `@@${translationKey}##${result}@@`;
} else {
return (
<span className="translated-string" data-orig-string={translationKey}>
{result}
</span>
);
}
};
/*
* Translates text and optionally also replaces XML-ish elements in the text with e.g. React components
* @param {string} text The untranslated text, e.g "click <a>here</a> now to %(foo)s".
* @param {object} variables Variable substitutions, e.g { foo: 'bar' }
* @param {object} tags Tag substitutions e.g. { 'a': (sub) => <a>{sub}</a> }
*
* In both variables and tags, the values to substitute with can be either simple strings, React components,
* or functions that return the value to use in the substitution (e.g. return a React component). In case of
* a tag replacement, the function receives as the argument the text inside the element corresponding to the tag.
*
* Use tag substitutions if you need to translate text between tags (e.g. "<a>Click here!</a>"), otherwise
* you will end up with literal "<a>" in your output, rather than HTML. Note that you can also use variable
* substitution to insert React components, but you can't use it to translate text between tags.
*
* @return a React <span> component if any non-strings were used in substitutions, otherwise a string
*/
// eslint-next-line @typescript-eslint/naming-convention
export function _t(text: TranslationKey, variables?: IVariables): string;
export function _t(text: TranslationKey, variables: IVariables | undefined, tags: Tags): React.ReactNode;
export function _t(text: TranslationKey, variables?: IVariables, tags?: Tags): TranslatedString {
// The translation returns text so there's no XSS vector here (no unsafe HTML, no code execution)
const { translated } = safeCounterpartTranslate(text, variables);
const substituted = substitute(translated, variables, tags);
return annotateStrings(substituted, text);
}
/**
* Utility function to look up a string by its translation key without resolving variables & tags
* @param key - the translation key to return the value for
*/
export function lookupString(key: TranslationKey): string {
return safeCounterpartTranslate(key, {}).translated;
}
/*
* Wraps normal _t function and adds atttribution for translations that used a fallback locale
* Wraps translations that fell back from active locale to fallback locale with a `<span lang=<fallback locale>>`
* @param {string} text The untranslated text, e.g "click <a>here</a> now to %(foo)s".
* @param {object} variables Variable substitutions, e.g { foo: 'bar' }
* @param {object} tags Tag substitutions e.g. { 'a': (sub) => <a>{sub}</a> }
*
* @return a React <span> component if any non-strings were used in substitutions
* or translation used a fallback locale, otherwise a string
*/
// eslint-next-line @typescript-eslint/naming-convention
export function _tDom(text: TranslationKey, variables?: IVariables): TranslatedString;
export function _tDom(text: TranslationKey, variables: IVariables, tags: Tags): React.ReactNode;
export function _tDom(text: TranslationKey, variables?: IVariables, tags?: Tags): TranslatedString {
// The translation returns text so there's no XSS vector here (no unsafe HTML, no code execution)
const { translated, isFallback } = safeCounterpartTranslate(text, variables);
const substituted = substitute(translated, variables, tags);
// wrap en fallback translation with lang attribute for screen readers
const result = isFallback ? <span lang="en">{substituted}</span> : substituted;
return annotateStrings(result, text);
}
/**
* Sanitizes unsafe text for the sanitizer, ensuring references to variables will not be considered
* replaceable by the translation functions.
* @param {string} text The text to sanitize.
* @returns {string} The sanitized text.
*/
export function sanitizeForTranslation(text: string): string {
// Add a non-breaking space so the regex doesn't trigger when translating.
return text.replace(/%\(([^)]*)\)/g, "%\xa0($1)");
}
/*
* Similar to _t(), except only does substitutions, and no translation
* @param {string} text The text, e.g "click <a>here</a> now to %(foo)s".
* @param {object} variables Variable substitutions, e.g { foo: 'bar' }
* @param {object} tags Tag substitutions e.g. { 'a': (sub) => <a>{sub}</a> }
*
* The values to substitute with can be either simple strings, or functions that return the value to use in
* the substitution (e.g. return a React component). In case of a tag replacement, the function receives as
* the argument the text inside the element corresponding to the tag.
*
* @return a React <span> component if any non-strings were used in substitutions, otherwise a string
*/
export function substitute(text: string, variables?: IVariables): string;
export function substitute(text: string, variables: IVariables | undefined, tags: Tags | undefined): string;
export function substitute(text: string, variables?: IVariables, tags?: Tags): string | React.ReactNode {
let result: React.ReactNode | string = text;
if (variables !== undefined) {
const regexpMapping: IVariables = {};
for (const variable in variables) {
regexpMapping[`%\\(${variable}\\)s`] = variables[variable];
}
result = replaceByRegexes(result as string, regexpMapping);
}
if (tags !== undefined) {
const regexpMapping: Tags = {};
for (const tag in tags) {
regexpMapping[`(<${tag}>(.*?)<\\/${tag}>|<${tag}>|<${tag}\\s*\\/>)`] = tags[tag];
}
result = replaceByRegexes(result as string, regexpMapping);
}
return result;
}
/**
* Replace parts of a text using regular expressions
* @param text - The text on which to perform substitutions
* @param mapping - A mapping from regular expressions in string form to replacement string or a
* function which will receive as the argument the capture groups defined in the regexp. E.g.
* { 'Hello (.?) World': (sub) => sub.toUpperCase() }
*
* @return a React <span> component if any non-strings were used in substitutions, otherwise a string
*/
export function replaceByRegexes(text: string, mapping: IVariables): string;
export function replaceByRegexes(text: string, mapping: Tags): React.ReactNode;
export function replaceByRegexes(text: string, mapping: IVariables | Tags): string | React.ReactNode {
// We initially store our output as an array of strings and objects (e.g. React components).
// This will then be converted to a string or a <span> at the end
const output: SubstitutionValue[] = [text];
// If we insert any components we need to wrap the output in a span. React doesn't like just an array of components.
let shouldWrapInSpan = false;
for (const regexpString in mapping) {
// TODO: Cache regexps
const regexp = new RegExp(regexpString, "g");
// Loop over what output we have so far and perform replacements
// We look for matches: if we find one, we get three parts: everything before the match, the replaced part,
// and everything after the match. Insert all three into the output. We need to do this because we can insert objects.
// Otherwise there would be no need for the splitting and we could do simple replacement.
let matchFoundSomewhere = false; // If we don't find a match anywhere we want to log it
for (let outputIndex = 0; outputIndex < output.length; outputIndex++) {
const inputText = output[outputIndex];
if (typeof inputText !== "string") {
// We might have inserted objects earlier, don't try to replace them
continue;
}
// process every match in the string
// starting with the first
let match = regexp.exec(inputText);
if (!match) continue;
matchFoundSomewhere = true;
// The textual part before the first match
const head = inputText.slice(0, match.index);
const parts: SubstitutionValue[] = [];
// keep track of prevMatch
let prevMatch;
while (match) {
// store prevMatch
prevMatch = match;
const capturedGroups = match.slice(2);
let replaced: SubstitutionValue;
// If substitution is a function, call it
if (mapping[regexpString] instanceof Function) {
replaced = ((mapping as Tags)[regexpString] as Function)(...capturedGroups);
} else {
replaced = mapping[regexpString];
}
if (typeof replaced === "object") {
shouldWrapInSpan = true;
}
// Here we also need to check that it actually is a string before comparing against one
// The head and tail are always strings
if (typeof replaced !== "string" || replaced !== "") {
parts.push(replaced);
}
// try the next match
match = regexp.exec(inputText);
// add the text between prevMatch and this one
// or the end of the string if prevMatch is the last match
let tail;
if (match) {
const startIndex = prevMatch.index + prevMatch[0].length;
tail = inputText.slice(startIndex, match.index);
} else {
tail = inputText.slice(prevMatch.index + prevMatch[0].length);
}
if (tail) {
parts.push(tail);
}
}
// Insert in reverse order as splice does insert-before and this way we get the final order correct
// remove the old element at the same time
output.splice(outputIndex, 1, ...parts);
if (head !== "") {
// Don't push empty nodes, they are of no use
output.splice(outputIndex, 0, head);
}
}
if (!matchFoundSomewhere) {
if (
// The current regexp did not match anything in the input. Missing
// matches is entirely possible because you might choose to show some
// variables only in the case of e.g. plurals. It's still a bit
// suspicious, and could be due to an error, so log it. However, not
// showing count is so common that it's not worth logging. And other
// commonly unused variables here, if there are any.
regexpString !== "%\\(count\\)s" &&
// Ignore the `locale` option which can be used to override the locale
// in counterpart
regexpString !== "%\\(locale\\)s"
) {
logger.log(`Could not find ${regexp} in ${text}`);
}
}
}
if (shouldWrapInSpan) {
return React.createElement("span", null, ...output);
} else {
return output.join("");
}
}
// Allow overriding the text displayed when no translation exists
// Currently only used in unit tests to avoid having to load
// the translations in element-web
export function setMissingEntryGenerator(f: (value: string) => void): void {
counterpart.setMissingEntryGenerator(f);
}
type Languages = {
[lang: string]: string;
};
export function setLanguage(preferredLangs: string | string[]): Promise<void> {
if (!Array.isArray(preferredLangs)) {
preferredLangs = [preferredLangs];
}
const plaf = PlatformPeg.get();
if (plaf) {
plaf.setLanguage(preferredLangs);
}
let langToUse: string;
let availLangs: Languages;
return getLangsJson()
.then((result) => {
availLangs = result;
for (let i = 0; i < preferredLangs.length; ++i) {
if (availLangs.hasOwnProperty(preferredLangs[i])) {
langToUse = preferredLangs[i];
break;
}
}
if (!langToUse) {
// Fallback to en_EN if none is found
langToUse = "en";
logger.error("Unable to find an appropriate language");
}
return getLanguageRetry(i18nFolder + availLangs[langToUse]);
})
.then(async (langData): Promise<ICounterpartTranslation | undefined> => {
counterpart.registerTranslations(langToUse, langData);
await registerCustomTranslations();
counterpart.setLocale(langToUse);
await SettingsStore.setValue("language", null, SettingLevel.DEVICE, langToUse);
// Adds a lot of noise to test runs, so disable logging there.
if (process.env.NODE_ENV !== "test") {
logger.log("set language to " + langToUse);
}
// Set 'en' as fallback language:
if (langToUse !== "en") {
return getLanguageRetry(i18nFolder + availLangs["en"]);
}
})
.then(async (langData): Promise<void> => {
if (langData) counterpart.registerTranslations("en", langData);
await registerCustomTranslations();
});
}
type Language = {
value: string;
label: string; // translated
labelInTargetLanguage: string; // translated
};
export async function getAllLanguagesFromJson(): Promise<string[]> {
return Object.keys(await getLangsJson());
}
export async function getAllLanguagesWithLabels(): Promise<Language[]> {
const languageNames = new Intl.DisplayNames([getUserLanguage()], { type: "language", style: "short" });
const languages = await getAllLanguagesFromJson();
return languages.map<Language>((langKey) => {
return {
value: langKey,
label: languageNames.of(langKey)!,
labelInTargetLanguage: new Intl.DisplayNames([langKey], { type: "language", style: "short" }).of(langKey)!,
};
});
}
export function getLanguagesFromBrowser(): readonly string[] {
if (navigator.languages && navigator.languages.length) return navigator.languages;
if (navigator.language) return [navigator.language];
return [navigator.userLanguage || "en"];
}
export function getLanguageFromBrowser(): string {
return getLanguagesFromBrowser()[0];
}
export function getCurrentLanguage(): string {
return counterpart.getLocale();
}
/**
* Given a list of language codes, pick the most appropriate one
* given the current language (ie. getCurrentLanguage())
* English is assumed to be a reasonable default.
*
* @param {string[]} langs List of language codes to pick from
* @returns {string} The most appropriate language code from langs
*/
export function pickBestLanguage(langs: string[]): string {
const currentLang = getCurrentLanguage();
const normalisedLangs = langs.map(normalizeLanguageKey);
{
// Best is an exact match
const currentLangIndex = normalisedLangs.indexOf(currentLang);
if (currentLangIndex > -1) return langs[currentLangIndex];
}
{
// Failing that, a different dialect of the same language
const closeLangIndex = normalisedLangs.findIndex((l) => l.slice(0, 2) === currentLang.slice(0, 2));
if (closeLangIndex > -1) return langs[closeLangIndex];
}
{
// Neither of those? Try an english variant.
const enIndex = normalisedLangs.findIndex((l) => l.startsWith("en"));
if (enIndex > -1) return langs[enIndex];
}
// if nothing else, use the first
return langs[0];
}
async function getLangsJson(): Promise<Languages> {
let url: string;
if (typeof webpackLangJsonUrl === "string") {
// in Jest this 'url' isn't a URL, so just fall through
url = webpackLangJsonUrl;
} else {
url = i18nFolder + "languages.json";
}
const res = await fetch(url, { method: "GET" });
if (!res.ok) {
throw new Error(`Failed to load ${url}, got ${res.status}`);
}
return res.json();
}
interface ICounterpartTranslation {
[key: string]:
| string
| {
[pluralisation: string]: string;
};
}
async function getLanguageRetry(langPath: string, num = 3): Promise<ICounterpartTranslation> {
return retry(
() => getLanguage(langPath),
num,
(e) => {
logger.log("Failed to load i18n", langPath);
logger.error(e);
return true; // always retry
},
);
}
async function getLanguage(langPath: string): Promise<ICounterpartTranslation> {
const res = await fetch(langPath, { method: "GET" });
if (!res.ok) {
throw new Error(`Failed to load ${langPath}, got ${res.status}`);
}
return res.json();
}
let cachedCustomTranslations: Optional<TranslationStringsObject> = null;
let cachedCustomTranslationsExpire = 0; // zero to trigger expiration right away
// This awkward class exists so the test runner can get at the function. It is
// not intended for practical or realistic usage.
export class CustomTranslationOptions {
public static lookupFn?: (url: string) => TranslationStringsObject;
private constructor() {
// static access for tests only
}
}
function doRegisterTranslations(customTranslations: TranslationStringsObject): void {
// We convert the operator-friendly version into something counterpart can consume.
// Map: lang → Record: string → translation
const langs: MapWithDefault<string, Record<string, string>> = new MapWithDefault(() => ({}));
for (const [translationKey, translations] of Object.entries(customTranslations)) {
for (const [lang, translation] of Object.entries(translations)) {
_.set(langs.getOrCreate(lang), translationKey.split(KEY_SEPARATOR), translation);
}
}
// Finally, tell counterpart about our translations
for (const [lang, translations] of langs) {
counterpart.registerTranslations(lang, translations);
}
}
/**
* Any custom modules with translations to load are parsed first, followed by an
* optionally defined translations file in the config. If no customization is made,
* or the file can't be parsed, no action will be taken.
*
* This function should be called *after* registering other translations data to
* ensure it overrides strings properly.
*/
export async function registerCustomTranslations({
testOnlyIgnoreCustomTranslationsCache = false,
}: {
testOnlyIgnoreCustomTranslationsCache?: boolean;
} = {}): Promise<void> {
const moduleTranslations = ModuleRunner.instance.allTranslations;
doRegisterTranslations(moduleTranslations);
const lookupUrl = SdkConfig.get().custom_translations_url;
if (!lookupUrl) return; // easy - nothing to do
try {
let json: Optional<TranslationStringsObject>;
if (testOnlyIgnoreCustomTranslationsCache || Date.now() >= cachedCustomTranslationsExpire) {
json = CustomTranslationOptions.lookupFn
? CustomTranslationOptions.lookupFn(lookupUrl)
: ((await (await fetch(lookupUrl)).json()) as TranslationStringsObject);
cachedCustomTranslations = json;
// Set expiration to the future, but not too far. Just trying to avoid
// repeated, successive, calls to the server rather than anything long-term.
cachedCustomTranslationsExpire = Date.now() + 5 * 60 * 1000;
} else {
json = cachedCustomTranslations;
}
// If the (potentially cached) json is invalid, don't use it.
if (!json) return;
// Finally, register it.
doRegisterTranslations(json);
} catch (e) {
// We consume all exceptions because it's considered non-fatal for custom
// translations to break. Most failures will be during initial development
// of the json file and not (hopefully) at runtime.
logger.warn("Ignoring error while registering custom translations: ", e);
// Like above: trigger a cache of the json to avoid successive calls.
cachedCustomTranslationsExpire = Date.now() + 5 * 60 * 1000;
}
}