From c53ed5be2e1abaffb0e553bcddc035eb1bc8f3cc Mon Sep 17 00:00:00 2001 From: Travis Ralston <travisr@matrix.org> Date: Wed, 21 Jul 2021 17:53:33 -0600 Subject: [PATCH] Use a MediaElementSourceAudioNode to process large audio files Fixes https://github.com/vector-im/element-web/issues/18149 See comment block contained within diff. --- src/voice/Playback.ts | 86 +++++++++++++++++++++++++++++++------------ 1 file changed, 63 insertions(+), 23 deletions(-) diff --git a/src/voice/Playback.ts b/src/voice/Playback.ts index df0bf593fa..b72963e90e 100644 --- a/src/voice/Playback.ts +++ b/src/voice/Playback.ts @@ -59,9 +59,10 @@ export class Playback extends EventEmitter implements IDestroyable { public readonly thumbnailWaveform: number[]; private readonly context: AudioContext; - private source: AudioBufferSourceNode; + private source: AudioBufferSourceNode | MediaElementAudioSourceNode; private state = PlaybackState.Decoding; private audioBuf: AudioBuffer; + private element: HTMLAudioElement; private resampledWaveform: number[]; private waveformObservable = new SimpleObservable<number[]>(); private readonly clock: PlaybackClock; @@ -129,36 +130,59 @@ export class Playback extends EventEmitter implements IDestroyable { this.removeAllListeners(); this.clock.destroy(); this.waveformObservable.close(); + if (this.element) { + URL.revokeObjectURL(this.element.src); + this.element.remove(); + } } public async prepare() { - // Safari compat: promise API not supported on this function - this.audioBuf = await new Promise((resolve, reject) => { - this.context.decodeAudioData(this.buf, b => resolve(b), async e => { - // This error handler is largely for Safari as well, which doesn't support Opus/Ogg - // very well. - console.error("Error decoding recording: ", e); - console.warn("Trying to re-encode to WAV instead..."); + // The point where we use an audio element is fairly arbitrary, though we don't want + // it to be too low. As of writing, voice messages want to show a waveform but audio + // messages do not. Using an audio element means we can't show a waveform preview, so + // we try to target the difference between a voice message file and large audio file. + // Overall, the point of this is to avoid memory-related issues due to storing a massive + // audio buffer in memory, as that can balloon to far greater than the input buffer's + // byte length. + if (this.buf.byteLength > 5 * 1024 * 1024) { // 5mb + console.log("Audio file too large: processing through <audio /> element"); + this.element = document.createElement("AUDIO") as HTMLAudioElement; + const prom = new Promise((resolve, reject) => { + this.element.onloadeddata = () => resolve(null); + this.element.onerror = (e) => reject(e); + }); + this.element.src = URL.createObjectURL(new Blob([this.buf])); + await prom; // make sure the audio element is ready for us + } else { + // Safari compat: promise API not supported on this function + this.audioBuf = await new Promise((resolve, reject) => { + this.context.decodeAudioData(this.buf, b => resolve(b), async e => { + // This error handler is largely for Safari as well, which doesn't support Opus/Ogg + // very well. + console.error("Error decoding recording: ", e); + console.warn("Trying to re-encode to WAV instead..."); - const wav = await decodeOgg(this.buf); + const wav = await decodeOgg(this.buf); - // noinspection ES6MissingAwait - not needed when using callbacks - this.context.decodeAudioData(wav, b => resolve(b), e => { - console.error("Still failed to decode recording: ", e); - reject(e); + // noinspection ES6MissingAwait - not needed when using callbacks + this.context.decodeAudioData(wav, b => resolve(b), e => { + console.error("Still failed to decode recording: ", e); + reject(e); + }); }); }); - }); - // Update the waveform to the real waveform once we have channel data to use. We don't - // exactly trust the user-provided waveform to be accurate... - const waveform = Array.from(this.audioBuf.getChannelData(0)); - this.resampledWaveform = makePlaybackWaveform(waveform); + // Update the waveform to the real waveform once we have channel data to use. We don't + // exactly trust the user-provided waveform to be accurate... + const waveform = Array.from(this.audioBuf.getChannelData(0)); + this.resampledWaveform = makePlaybackWaveform(waveform); + } + this.waveformObservable.update(this.resampledWaveform); this.emit(PlaybackState.Stopped); // signal that we're not decoding anymore this.clock.flagLoadTime(); // must happen first because setting the duration fires a clock update - this.clock.durationSeconds = this.audioBuf.duration; + this.clock.durationSeconds = this.element ? this.element.duration : this.audioBuf.duration; } private onPlaybackEnd = async () => { @@ -171,7 +195,11 @@ export class Playback extends EventEmitter implements IDestroyable { if (this.state === PlaybackState.Stopped) { this.disconnectSource(); this.makeNewSourceBuffer(); - this.source.start(); + if (this.element) { + await this.element.play(); + } else { + (this.source as AudioBufferSourceNode).start(); + } } // We use the context suspend/resume functions because it allows us to pause a source @@ -182,13 +210,21 @@ export class Playback extends EventEmitter implements IDestroyable { } private disconnectSource() { + if (this.element) return; // leave connected, we can (and must) re-use it this.source?.disconnect(); this.source?.removeEventListener("ended", this.onPlaybackEnd); } private makeNewSourceBuffer() { - this.source = this.context.createBufferSource(); - this.source.buffer = this.audioBuf; + if (this.element && this.source) return; // leave connected, we can (and must) re-use it + + if (this.element) { + this.source = this.context.createMediaElementSource(this.element); + } else { + this.source = this.context.createBufferSource(); + this.source.buffer = this.audioBuf; + } + this.source.addEventListener("ended", this.onPlaybackEnd); this.source.connect(this.context.destination); } @@ -241,7 +277,11 @@ export class Playback extends EventEmitter implements IDestroyable { // when it comes time to the user hitting play. After a couple jumps, the user // will have desynced the clock enough to be about 10-15 seconds off, while this // keeps it as close to perfect as humans can perceive. - this.source.start(now, timeSeconds); + if (this.element) { + this.element.currentTime = timeSeconds; + } else { + (this.source as AudioBufferSourceNode).start(now, timeSeconds); + } // Dev note: it's critical that the code gap between `this.source.start()` and // `this.pause()` is as small as possible: we do not want to delay *anything*