diff --git a/__mocks__/empty.js b/__mocks__/empty.js new file mode 100644 index 0000000000..51fb4fe937 --- /dev/null +++ b/__mocks__/empty.js @@ -0,0 +1,2 @@ +// Yes, this is empty. +module.exports = {}; diff --git a/package.json b/package.json index d9ea440936..5a32cf2c5a 100644 --- a/package.json +++ b/package.json @@ -186,7 +186,10 @@ ], "moduleNameMapper": { "\\.(gif|png|svg|ttf|woff2)$": "/__mocks__/imageMock.js", - "\\$webapp/i18n/languages.json": "/__mocks__/languages.json" + "\\$webapp/i18n/languages.json": "/__mocks__/languages.json", + "decoderWorker\\.min\\.js": "/__mocks__/empty.js", + "decoderWorker\\.min\\.wasm": "/__mocks__/empty.js", + "waveWorker\\.min\\.js": "/__mocks__/empty.js" }, "transformIgnorePatterns": [ "/node_modules/(?!matrix-js-sdk).+$" diff --git a/src/@types/global.d.ts b/src/@types/global.d.ts index e8f2f1bc08..f04a2ff237 100644 --- a/src/@types/global.d.ts +++ b/src/@types/global.d.ts @@ -52,6 +52,9 @@ declare global { init: () => Promise; }; + // Needed for Safari, unknown to TypeScript + webkitAudioContext: typeof AudioContext; + mxContentMessages: ContentMessages; mxToastStore: ToastStore; mxDeviceListener: DeviceListener; diff --git a/src/rageshake/rageshake.js b/src/rageshake/rageshake.js index b886f369df..9512f62e42 100644 --- a/src/rageshake/rageshake.js +++ b/src/rageshake/rageshake.js @@ -73,7 +73,9 @@ class ConsoleLogger { // Convert objects and errors to helpful things args = args.map((arg) => { - if (arg instanceof Error) { + if (arg instanceof DOMException) { + return arg.message + ` (${arg.name} | ${arg.code}) ` + (arg.stack ? `\n${arg.stack}` : ''); + } else if (arg instanceof Error) { return arg.message + (arg.stack ? `\n${arg.stack}` : ''); } else if (typeof (arg) === 'object') { try { diff --git a/src/voice/Playback.ts b/src/voice/Playback.ts index caa5241e1a..66667b9c0c 100644 --- a/src/voice/Playback.ts +++ b/src/voice/Playback.ts @@ -21,6 +21,7 @@ import {SimpleObservable} from "matrix-widget-api"; import {IDestroyable} from "../utils/IDestroyable"; import {PlaybackClock} from "./PlaybackClock"; import {clamp} from "../utils/numbers"; +import {createAudioContext, decodeOgg} from "./compat"; export enum PlaybackState { Decoding = "decoding", @@ -49,7 +50,7 @@ export class Playback extends EventEmitter implements IDestroyable { */ constructor(private buf: ArrayBuffer, seedWaveform = DEFAULT_WAVEFORM) { super(); - this.context = new AudioContext(); + this.context = createAudioContext(); this.resampledWaveform = arrayFastResample(seedWaveform ?? DEFAULT_WAVEFORM, PLAYBACK_WAVEFORM_SAMPLES); this.waveformObservable.update(this.resampledWaveform); this.clock = new PlaybackClock(this.context); @@ -91,7 +92,23 @@ export class Playback extends EventEmitter implements IDestroyable { } public async prepare() { - this.audioBuf = await this.context.decodeAudioData(this.buf); + // Safari compat: promise API not supported on this function + this.audioBuf = await new Promise((resolve, reject) => { + this.context.decodeAudioData(this.buf, b => resolve(b), async e => { + // This error handler is largely for Safari as well, which doesn't support Opus/Ogg + // very well. + console.error("Error decoding recording: ", e); + console.warn("Trying to re-encode to WAV instead..."); + + const wav = await decodeOgg(this.buf); + + // noinspection ES6MissingAwait - not needed when using callbacks + this.context.decodeAudioData(wav, b => resolve(b), e => { + console.error("Still failed to decode recording: ", e); + reject(e); + }); + }); + }); // Update the waveform to the real waveform once we have channel data to use. We don't // exactly trust the user-provided waveform to be accurate... diff --git a/src/voice/VoiceRecording.ts b/src/voice/VoiceRecording.ts index c4a0a78ce5..fde5779fa2 100644 --- a/src/voice/VoiceRecording.ts +++ b/src/voice/VoiceRecording.ts @@ -19,16 +19,17 @@ import encoderPath from 'opus-recorder/dist/encoderWorker.min.js'; import {MatrixClient} from "matrix-js-sdk/src/client"; import CallMediaHandler from "../CallMediaHandler"; import {SimpleObservable} from "matrix-widget-api"; -import {clamp} from "../utils/numbers"; +import {clamp, percentageOf, percentageWithin} from "../utils/numbers"; import EventEmitter from "events"; import {IDestroyable} from "../utils/IDestroyable"; import {Singleflight} from "../utils/Singleflight"; import {PayloadEvent, WORKLET_NAME} from "./consts"; import {UPDATE_EVENT} from "../stores/AsyncStore"; import {Playback} from "./Playback"; +import {createAudioContext} from "./compat"; const CHANNELS = 1; // stereo isn't important -const SAMPLE_RATE = 48000; // 48khz is what WebRTC uses. 12khz is where we lose quality. +export const SAMPLE_RATE = 48000; // 48khz is what WebRTC uses. 12khz is where we lose quality. const BITRATE = 24000; // 24kbps is pretty high quality for our use case in opus. const TARGET_MAX_LENGTH = 120; // 2 minutes in seconds. Somewhat arbitrary, though longer == larger files. const TARGET_WARN_TIME_LEFT = 10; // 10 seconds, also somewhat arbitrary. @@ -55,6 +56,7 @@ export class VoiceRecording extends EventEmitter implements IDestroyable { private recorderStream: MediaStream; private recorderFFT: AnalyserNode; private recorderWorklet: AudioWorkletNode; + private recorderProcessor: ScriptProcessorNode; private buffer = new Uint8Array(0); // use this.audioBuffer to access private mxc: string; private recording = false; @@ -90,78 +92,107 @@ export class VoiceRecording extends EventEmitter implements IDestroyable { } private async makeRecorder() { - this.recorderStream = await navigator.mediaDevices.getUserMedia({ - audio: { - channelCount: CHANNELS, - noiseSuppression: true, // browsers ignore constraints they can't honour - deviceId: CallMediaHandler.getAudioInput(), - }, - }); - this.recorderContext = new AudioContext({ - // latencyHint: "interactive", // we don't want a latency hint (this causes data smoothing) - }); - this.recorderSource = this.recorderContext.createMediaStreamSource(this.recorderStream); - this.recorderFFT = this.recorderContext.createAnalyser(); + try { + this.recorderStream = await navigator.mediaDevices.getUserMedia({ + audio: { + channelCount: CHANNELS, + noiseSuppression: true, // browsers ignore constraints they can't honour + deviceId: CallMediaHandler.getAudioInput(), + }, + }); + this.recorderContext = createAudioContext({ + // latencyHint: "interactive", // we don't want a latency hint (this causes data smoothing) + }); + this.recorderSource = this.recorderContext.createMediaStreamSource(this.recorderStream); + this.recorderFFT = this.recorderContext.createAnalyser(); - // Bring the FFT time domain down a bit. The default is 2048, and this must be a power - // of two. We use 64 points because we happen to know down the line we need less than - // that, but 32 would be too few. Large numbers are not helpful here and do not add - // precision: they introduce higher precision outputs of the FFT (frequency data), but - // it makes the time domain less than helpful. - this.recorderFFT.fftSize = 64; + // Bring the FFT time domain down a bit. The default is 2048, and this must be a power + // of two. We use 64 points because we happen to know down the line we need less than + // that, but 32 would be too few. Large numbers are not helpful here and do not add + // precision: they introduce higher precision outputs of the FFT (frequency data), but + // it makes the time domain less than helpful. + this.recorderFFT.fftSize = 64; - // Set up our worklet. We use this for timing information and waveform analysis: the - // web audio API prefers this be done async to avoid holding the main thread with math. - const mxRecorderWorkletPath = document.body.dataset.vectorRecorderWorkletScript; - if (!mxRecorderWorkletPath) { - throw new Error("Unable to create recorder: no worklet script registered"); - } - await this.recorderContext.audioWorklet.addModule(mxRecorderWorkletPath); - this.recorderWorklet = new AudioWorkletNode(this.recorderContext, WORKLET_NAME); - - // Connect our inputs and outputs - this.recorderSource.connect(this.recorderFFT); - this.recorderSource.connect(this.recorderWorklet); - this.recorderWorklet.connect(this.recorderContext.destination); - - // Dev note: we can't use `addEventListener` for some reason. It just doesn't work. - this.recorderWorklet.port.onmessage = (ev) => { - switch (ev.data['ev']) { - case PayloadEvent.Timekeep: - this.processAudioUpdate(ev.data['timeSeconds']); - break; - case PayloadEvent.AmplitudeMark: - // Sanity check to make sure we're adding about one sample per second - if (ev.data['forSecond'] === this.amplitudes.length) { - this.amplitudes.push(ev.data['amplitude']); - } - break; + // Set up our worklet. We use this for timing information and waveform analysis: the + // web audio API prefers this be done async to avoid holding the main thread with math. + const mxRecorderWorkletPath = document.body.dataset.vectorRecorderWorkletScript; + if (!mxRecorderWorkletPath) { + // noinspection ExceptionCaughtLocallyJS + throw new Error("Unable to create recorder: no worklet script registered"); } - }; - this.recorder = new Recorder({ - encoderPath, // magic from webpack - encoderSampleRate: SAMPLE_RATE, - encoderApplication: 2048, // voice (default is "audio") - streamPages: true, // this speeds up the encoding process by using CPU over time - encoderFrameSize: 20, // ms, arbitrary frame size we send to the encoder - numberOfChannels: CHANNELS, - sourceNode: this.recorderSource, - encoderBitRate: BITRATE, + // Connect our inputs and outputs + this.recorderSource.connect(this.recorderFFT); - // We use low values for the following to ease CPU usage - the resulting waveform - // is indistinguishable for a voice message. Note that the underlying library will - // pick defaults which prefer the highest possible quality, CPU be damned. - encoderComplexity: 3, // 0-10, 10 is slow and high quality. - resampleQuality: 3, // 0-10, 10 is slow and high quality - }); - this.recorder.ondataavailable = (a: ArrayBuffer) => { - const buf = new Uint8Array(a); - const newBuf = new Uint8Array(this.buffer.length + buf.length); - newBuf.set(this.buffer, 0); - newBuf.set(buf, this.buffer.length); - this.buffer = newBuf; - }; + if (this.recorderContext.audioWorklet) { + await this.recorderContext.audioWorklet.addModule(mxRecorderWorkletPath); + this.recorderWorklet = new AudioWorkletNode(this.recorderContext, WORKLET_NAME); + this.recorderSource.connect(this.recorderWorklet); + this.recorderWorklet.connect(this.recorderContext.destination); + + // Dev note: we can't use `addEventListener` for some reason. It just doesn't work. + this.recorderWorklet.port.onmessage = (ev) => { + switch (ev.data['ev']) { + case PayloadEvent.Timekeep: + this.processAudioUpdate(ev.data['timeSeconds']); + break; + case PayloadEvent.AmplitudeMark: + // Sanity check to make sure we're adding about one sample per second + if (ev.data['forSecond'] === this.amplitudes.length) { + this.amplitudes.push(ev.data['amplitude']); + } + break; + } + }; + } else { + // Safari fallback: use a processor node instead, buffered to 1024 bytes of data + // like the worklet is. + this.recorderProcessor = this.recorderContext.createScriptProcessor(1024, CHANNELS, CHANNELS); + this.recorderSource.connect(this.recorderProcessor); + this.recorderProcessor.connect(this.recorderContext.destination); + this.recorderProcessor.addEventListener("audioprocess", this.onAudioProcess); + } + + this.recorder = new Recorder({ + encoderPath, // magic from webpack + encoderSampleRate: SAMPLE_RATE, + encoderApplication: 2048, // voice (default is "audio") + streamPages: true, // this speeds up the encoding process by using CPU over time + encoderFrameSize: 20, // ms, arbitrary frame size we send to the encoder + numberOfChannels: CHANNELS, + sourceNode: this.recorderSource, + encoderBitRate: BITRATE, + + // We use low values for the following to ease CPU usage - the resulting waveform + // is indistinguishable for a voice message. Note that the underlying library will + // pick defaults which prefer the highest possible quality, CPU be damned. + encoderComplexity: 3, // 0-10, 10 is slow and high quality. + resampleQuality: 3, // 0-10, 10 is slow and high quality + }); + this.recorder.ondataavailable = (a: ArrayBuffer) => { + const buf = new Uint8Array(a); + const newBuf = new Uint8Array(this.buffer.length + buf.length); + newBuf.set(this.buffer, 0); + newBuf.set(buf, this.buffer.length); + this.buffer = newBuf; + }; + } catch (e) { + console.error("Error starting recording: ", e); + if (e instanceof DOMException) { // Unhelpful DOMExceptions are common - parse them sanely + console.error(`${e.name} (${e.code}): ${e.message}`); + } + + // Clean up as best as possible + if (this.recorderStream) this.recorderStream.getTracks().forEach(t => t.stop()); + if (this.recorderSource) this.recorderSource.disconnect(); + if (this.recorder) this.recorder.close(); + if (this.recorderContext) { + // noinspection ES6MissingAwait - not important that we wait + this.recorderContext.close(); + } + + throw e; // rethrow so upstream can handle it + } } private get audioBuffer(): Uint8Array { @@ -190,6 +221,13 @@ export class VoiceRecording extends EventEmitter implements IDestroyable { return this.mxc; } + private onAudioProcess = (ev: AudioProcessingEvent) => { + this.processAudioUpdate(ev.playbackTime); + + // We skip the functionality of the worklet regarding waveform calculations: we + // should get that information pretty quick during the playback info. + }; + private processAudioUpdate = (timeSeconds: number) => { if (!this.recording) return; @@ -197,7 +235,16 @@ export class VoiceRecording extends EventEmitter implements IDestroyable { // size. The time domain is also known as the audio waveform. We're ignoring the // output of the FFT here (frequency data) because we're not interested in it. const data = new Float32Array(this.recorderFFT.fftSize); - this.recorderFFT.getFloatTimeDomainData(data); + if (!this.recorderFFT.getFloatTimeDomainData) { + // Safari compat + const data2 = new Uint8Array(this.recorderFFT.fftSize); + this.recorderFFT.getByteTimeDomainData(data2); + for (let i = 0; i < data2.length; i++) { + data[i] = percentageWithin(percentageOf(data2[i], 0, 256), -1, 1); + } + } else { + this.recorderFFT.getFloatTimeDomainData(data); + } // We can't just `Array.from()` the array because we're dealing with 32bit floats // and the built-in function won't consider that when converting between numbers. @@ -268,7 +315,11 @@ export class VoiceRecording extends EventEmitter implements IDestroyable { // Disconnect the source early to start shutting down resources await this.recorder.stop(); // stop first to flush the last frame this.recorderSource.disconnect(); - this.recorderWorklet.disconnect(); + if (this.recorderWorklet) this.recorderWorklet.disconnect(); + if (this.recorderProcessor) { + this.recorderProcessor.disconnect(); + this.recorderProcessor.removeEventListener("audioprocess", this.onAudioProcess); + } // close the context after the recorder so the recorder doesn't try to // connect anything to the context (this would generate a warning) diff --git a/src/voice/compat.ts b/src/voice/compat.ts new file mode 100644 index 0000000000..316d779e28 --- /dev/null +++ b/src/voice/compat.ts @@ -0,0 +1,82 @@ +/* +Copyright 2021 The Matrix.org Foundation C.I.C. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +import {SAMPLE_RATE} from "./VoiceRecording"; + +// @ts-ignore - we know that this is not a module. We're looking for a path. +import decoderWasmPath from 'opus-recorder/dist/decoderWorker.min.wasm'; +import wavEncoderPath from 'opus-recorder/dist/waveWorker.min.js'; +import decoderPath from 'opus-recorder/dist/decoderWorker.min.js'; + +export function createAudioContext(opts?: AudioContextOptions): AudioContext { + if (window.AudioContext) { + return new AudioContext(opts); + } else if (window.webkitAudioContext) { + // While the linter is correct that "a constructor name should not start with + // a lowercase letter", it's also wrong to think that we have control over this. + // eslint-disable-next-line new-cap + return new window.webkitAudioContext(opts); + } else { + throw new Error("Unsupported browser"); + } +} + +export function decodeOgg(audioBuffer: ArrayBuffer): Promise { + // Condensed version of decoder example, using a promise: + // https://github.com/chris-rudmin/opus-recorder/blob/master/example/decoder.html + return new Promise((resolve) => { // no reject because the workers don't seem to have a fail path + console.log("Decoder WASM path: " + decoderWasmPath); // so we use the variable (avoid tree shake) + const typedArray = new Uint8Array(audioBuffer); + const decoderWorker = new Worker(decoderPath); + const wavWorker = new Worker(wavEncoderPath); + + decoderWorker.postMessage({ + command: 'init', + decoderSampleRate: SAMPLE_RATE, + outputBufferSampleRate: SAMPLE_RATE, + }); + + wavWorker.postMessage({ + command: 'init', + wavBitDepth: 24, // standard for 48khz (SAMPLE_RATE) + wavSampleRate: SAMPLE_RATE, + }); + + decoderWorker.onmessage = (ev) => { + if (ev.data === null) { // null == done + wavWorker.postMessage({command: 'done'}); + return; + } + + wavWorker.postMessage({ + command: 'encode', + buffers: ev.data, + }, ev.data.map(b => b.buffer)); + }; + + wavWorker.onmessage = (ev) => { + if (ev.data.message === 'page') { + // The encoding comes through as a single page + resolve(new Blob([ev.data.page], {type: "audio/wav"}).arrayBuffer()); + } + }; + + decoderWorker.postMessage({ + command: 'decode', + pages: typedArray, + }, [typedArray.buffer]); + }); +}