From 6f794cca9b173ddf6d3a3b71088ff15dd93d53c5 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Thu, 15 Apr 2021 13:05:27 -0600 Subject: [PATCH 1/6] Fill in some metadata for the sent event --- .../views/rooms/VoiceRecordComposerTile.tsx | 23 +++++++++++++++++++ src/voice/VoiceRecording.ts | 15 +++++++++++- 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/components/views/rooms/VoiceRecordComposerTile.tsx b/src/components/views/rooms/VoiceRecordComposerTile.tsx index 1210a44958..f46b7c6311 100644 --- a/src/components/views/rooms/VoiceRecordComposerTile.tsx +++ b/src/components/views/rooms/VoiceRecordComposerTile.tsx @@ -55,7 +55,30 @@ export default class VoiceRecordComposerTile extends React.PureComponent r['content_uri']); From 7d9562137ef2768c34214c122ba99b77cf5a28f4 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Mon, 19 Apr 2021 21:54:08 -0600 Subject: [PATCH 2/6] Replace deprecated processor with a worklet --- src/@types/global.d.ts | 27 ++++++++++++++++++++++++ src/voice/RecorderWorklet.ts | 37 +++++++++++++++++++++++++++++++++ src/voice/VoiceRecording.ts | 40 ++++++++++++++++++++++-------------- src/voice/consts.ts | 29 ++++++++++++++++++++++++++ 4 files changed, 118 insertions(+), 15 deletions(-) create mode 100644 src/voice/RecorderWorklet.ts create mode 100644 src/voice/consts.ts diff --git a/src/@types/global.d.ts b/src/@types/global.d.ts index ee0963e537..78dad28566 100644 --- a/src/@types/global.d.ts +++ b/src/@types/global.d.ts @@ -129,4 +129,31 @@ declare global { // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Error/columnNumber columnNumber?: number; } + + // https://github.com/microsoft/TypeScript/issues/28308#issuecomment-650802278 + interface AudioWorkletProcessor { + readonly port: MessagePort; + process( + inputs: Float32Array[][], + outputs: Float32Array[][], + parameters: Record + ): boolean; + + } + + // https://github.com/microsoft/TypeScript/issues/28308#issuecomment-650802278 + const AudioWorkletProcessor: { + prototype: AudioWorkletProcessor; + new (options?: AudioWorkletNodeOptions): AudioWorkletProcessor; + }; + + // https://github.com/microsoft/TypeScript/issues/28308#issuecomment-650802278 + function registerProcessor( + name: string, + processorCtor: (new ( + options?: AudioWorkletNodeOptions + ) => AudioWorkletProcessor) & { + parameterDescriptors?: AudioParamDescriptor[]; + } + ); } diff --git a/src/voice/RecorderWorklet.ts b/src/voice/RecorderWorklet.ts new file mode 100644 index 0000000000..11f24fce4c --- /dev/null +++ b/src/voice/RecorderWorklet.ts @@ -0,0 +1,37 @@ +/* +Copyright 2021 The Matrix.org Foundation C.I.C. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +import {ITimingPayload, PayloadEvent, WORKLET_NAME} from "./consts"; + +// from AudioWorkletGlobalScope: https://developer.mozilla.org/en-US/docs/Web/API/AudioWorkletGlobalScope +declare const currentTime: number; +declare const currentFrame: number; +declare const sampleRate: number; + +class MxVoiceWorklet extends AudioWorkletProcessor { + constructor() { + super(); + } + + process(inputs, outputs, parameters) { + this.port.postMessage({ev: PayloadEvent.Timekeep, timeSeconds: currentTime}); + return true; + } +} + +registerProcessor(WORKLET_NAME, MxVoiceWorklet); + +export default null; // to appease module loaders (we never use the export) diff --git a/src/voice/VoiceRecording.ts b/src/voice/VoiceRecording.ts index fc52a38fa9..8e506c235c 100644 --- a/src/voice/VoiceRecording.ts +++ b/src/voice/VoiceRecording.ts @@ -23,6 +23,7 @@ import {clamp} from "../utils/numbers"; import EventEmitter from "events"; import {IDestroyable} from "../utils/IDestroyable"; import {Singleflight} from "../utils/Singleflight"; +import {PayloadEvent, WORKLET_NAME} from "./consts"; const CHANNELS = 1; // stereo isn't important const SAMPLE_RATE = 48000; // 48khz is what WebRTC uses. 12khz is where we lose quality. @@ -49,7 +50,7 @@ export class VoiceRecording extends EventEmitter implements IDestroyable { private recorderSource: MediaStreamAudioSourceNode; private recorderStream: MediaStream; private recorderFFT: AnalyserNode; - private recorderProcessor: ScriptProcessorNode; + private recorderWorklet: AudioWorkletNode; private buffer = new Uint8Array(0); private mxc: string; private recording = false; @@ -93,18 +94,28 @@ export class VoiceRecording extends EventEmitter implements IDestroyable { // it makes the time domain less than helpful. this.recorderFFT.fftSize = 64; - // We use an audio processor to get accurate timing information. - // The size of the audio buffer largely decides how quickly we push timing/waveform data - // out of this class. Smaller buffers mean we update more frequently as we can't hold as - // many bytes. Larger buffers mean slower updates. For scale, 1024 gives us about 30Hz of - // updates and 2048 gives us about 20Hz. We use 1024 to get as close to perceived realtime - // as possible. Must be a power of 2. - this.recorderProcessor = this.recorderContext.createScriptProcessor(1024, CHANNELS, CHANNELS); + // Set up our worklet. We use this for timing information and waveform analysis: the + // web audio API prefers this be done async to avoid holding the main thread with math. + const mxRecorderWorkletPath = document.body.dataset.vectorRecorderWorkletScript; + if (!mxRecorderWorkletPath) { + throw new Error("Unable to create recorder: no worklet script registered"); + } + await this.recorderContext.audioWorklet.addModule(mxRecorderWorkletPath); + this.recorderWorklet = new AudioWorkletNode(this.recorderContext, WORKLET_NAME); // Connect our inputs and outputs this.recorderSource.connect(this.recorderFFT); - this.recorderSource.connect(this.recorderProcessor); - this.recorderProcessor.connect(this.recorderContext.destination); + this.recorderSource.connect(this.recorderWorklet); + this.recorderWorklet.connect(this.recorderContext.destination); + + // Dev note: we can't use `addEventListener` for some reason. It just doesn't work. + this.recorderWorklet.port.onmessage = (ev) => { + switch(ev.data['ev']) { + case PayloadEvent.Timekeep: + this.processAudioUpdate(ev.data['timeSeconds']); + break; + } + }; this.recorder = new Recorder({ encoderPath, // magic from webpack @@ -151,7 +162,7 @@ export class VoiceRecording extends EventEmitter implements IDestroyable { return this.mxc; } - private processAudioUpdate = (ev: AudioProcessingEvent) => { + private processAudioUpdate = (timeSeconds: number) => { if (!this.recording) return; // The time domain is the input to the FFT, which means we use an array of the same @@ -175,12 +186,12 @@ export class VoiceRecording extends EventEmitter implements IDestroyable { this.observable.update({ waveform: translatedData, - timeSeconds: ev.playbackTime, + timeSeconds: timeSeconds, }); // Now that we've updated the data/waveform, let's do a time check. We don't want to // go horribly over the limit. We also emit a warning state if needed. - const secondsLeft = TARGET_MAX_LENGTH - ev.playbackTime; + const secondsLeft = TARGET_MAX_LENGTH - timeSeconds; if (secondsLeft <= 0) { // noinspection JSIgnoredPromiseFromCall - we aren't concerned with it overlapping this.stop(); @@ -204,7 +215,6 @@ export class VoiceRecording extends EventEmitter implements IDestroyable { } this.observable = new SimpleObservable(); await this.makeRecorder(); - this.recorderProcessor.addEventListener("audioprocess", this.processAudioUpdate); await this.recorder.start(); this.recording = true; this.emit(RecordingState.Started); @@ -218,6 +228,7 @@ export class VoiceRecording extends EventEmitter implements IDestroyable { // Disconnect the source early to start shutting down resources this.recorderSource.disconnect(); + this.recorderWorklet.disconnect(); await this.recorder.stop(); // close the context after the recorder so the recorder doesn't try to @@ -229,7 +240,6 @@ export class VoiceRecording extends EventEmitter implements IDestroyable { // Finally do our post-processing and clean up this.recording = false; - this.recorderProcessor.removeEventListener("audioprocess", this.processAudioUpdate); await this.recorder.close(); this.emit(RecordingState.Ended); diff --git a/src/voice/consts.ts b/src/voice/consts.ts new file mode 100644 index 0000000000..dbd3b574f4 --- /dev/null +++ b/src/voice/consts.ts @@ -0,0 +1,29 @@ +/* +Copyright 2021 The Matrix.org Foundation C.I.C. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +export const WORKLET_NAME = "mx-voice-worklet"; + +export enum PayloadEvent { + Timekeep = "timekeep", +} + +export interface IPayload { + ev: PayloadEvent; +} + +export interface ITimingPayload extends IPayload { + timeSeconds: number; +} From 61730f2f881292bfcdf5becd249c0fc4c45edb1e Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Mon, 19 Apr 2021 23:05:06 -0600 Subject: [PATCH 3/6] Populate waveform data on voice message event --- .../views/rooms/VoiceRecordComposerTile.tsx | 8 ++++- src/utils/arrays.ts | 2 +- src/voice/RecorderWorklet.ts | 36 ++++++++++++++++++- src/voice/VoiceRecording.ts | 12 +++++++ src/voice/consts.ts | 8 +++++ 5 files changed, 63 insertions(+), 3 deletions(-) diff --git a/src/components/views/rooms/VoiceRecordComposerTile.tsx b/src/components/views/rooms/VoiceRecordComposerTile.tsx index f46b7c6311..05beb3a0ca 100644 --- a/src/components/views/rooms/VoiceRecordComposerTile.tsx +++ b/src/components/views/rooms/VoiceRecordComposerTile.tsx @@ -77,7 +77,13 @@ export default class VoiceRecordComposerTile extends React.PureComponent Math.round(v * 1024)), }, }); await VoiceRecordingStore.instance.disposeRecording(); diff --git a/src/utils/arrays.ts b/src/utils/arrays.ts index 52308937f7..8ab66dfb29 100644 --- a/src/utils/arrays.ts +++ b/src/utils/arrays.ts @@ -54,7 +54,7 @@ export function arraySeed(val: T, length: number): T[] { * @param a The array to clone. Must be defined. * @returns A copy of the array. */ -export function arrayFastClone(a: any[]): any[] { +export function arrayFastClone(a: T[]): T[] { return a.slice(0, a.length); } diff --git a/src/voice/RecorderWorklet.ts b/src/voice/RecorderWorklet.ts index 11f24fce4c..8d6f1e9627 100644 --- a/src/voice/RecorderWorklet.ts +++ b/src/voice/RecorderWorklet.ts @@ -14,7 +14,8 @@ See the License for the specific language governing permissions and limitations under the License. */ -import {ITimingPayload, PayloadEvent, WORKLET_NAME} from "./consts"; +import {IAmplitudePayload, ITimingPayload, PayloadEvent, WORKLET_NAME} from "./consts"; +import {percentageOf} from "../utils/numbers"; // from AudioWorkletGlobalScope: https://developer.mozilla.org/en-US/docs/Web/API/AudioWorkletGlobalScope declare const currentTime: number; @@ -22,12 +23,45 @@ declare const currentFrame: number; declare const sampleRate: number; class MxVoiceWorklet extends AudioWorkletProcessor { + private nextAmplitudeSecond = 0; + constructor() { super(); } process(inputs, outputs, parameters) { + // We only fire amplitude updates once a second to avoid flooding the recording instance + // with useless data. Much of the data would end up discarded, so we ratelimit ourselves + // here. + const currentSecond = Math.round(currentTime); + if (currentSecond === this.nextAmplitudeSecond) { + // We're expecting exactly one mono input source, so just grab the very first frame of + // samples for the analysis. + const monoChan = inputs[0][0]; + + // The amplitude of the frame's samples is effectively the loudness of the frame. This + // translates into a bar which can be rendered as part of the whole recording clip's + // waveform. + // + // We translate the amplitude down to 0-1 for sanity's sake. + const minVal = monoChan.reduce((m, v) => Math.min(m, v), Number.MAX_SAFE_INTEGER); + const maxVal = monoChan.reduce((m, v) => Math.max(m, v), Number.MIN_SAFE_INTEGER); + const amplitude = percentageOf(maxVal, -1, 1) - percentageOf(minVal, -1, 1); + + this.port.postMessage({ + ev: PayloadEvent.AmplitudeMark, + amplitude: amplitude, + forSecond: currentSecond, + }); + this.nextAmplitudeSecond++; + } + + // We mostly use this worklet to fire regular clock updates through to components this.port.postMessage({ev: PayloadEvent.Timekeep, timeSeconds: currentTime}); + + // We're supposed to return false when we're "done" with the audio clip, but seeing as + // we are acting as a passive processor we are never truly "done". The browser will clean + // us up when it is done with us. return true; } } diff --git a/src/voice/VoiceRecording.ts b/src/voice/VoiceRecording.ts index 8e506c235c..716936f636 100644 --- a/src/voice/VoiceRecording.ts +++ b/src/voice/VoiceRecording.ts @@ -24,6 +24,7 @@ import EventEmitter from "events"; import {IDestroyable} from "../utils/IDestroyable"; import {Singleflight} from "../utils/Singleflight"; import {PayloadEvent, WORKLET_NAME} from "./consts"; +import {arrayFastClone} from "../utils/arrays"; const CHANNELS = 1; // stereo isn't important const SAMPLE_RATE = 48000; // 48khz is what WebRTC uses. 12khz is where we lose quality. @@ -55,11 +56,16 @@ export class VoiceRecording extends EventEmitter implements IDestroyable { private mxc: string; private recording = false; private observable: SimpleObservable; + private amplitudes: number[] = []; // at each second mark, generated public constructor(private client: MatrixClient) { super(); } + public get finalWaveform(): number[] { + return arrayFastClone(this.amplitudes); + } + public get contentType(): string { return "audio/ogg"; } @@ -114,6 +120,12 @@ export class VoiceRecording extends EventEmitter implements IDestroyable { case PayloadEvent.Timekeep: this.processAudioUpdate(ev.data['timeSeconds']); break; + case PayloadEvent.AmplitudeMark: + // Sanity check to make sure we're adding about one sample per second + if (ev.data['forSecond'] === this.amplitudes.length) { + this.amplitudes.push(ev.data['amplitude']); + } + break; } }; diff --git a/src/voice/consts.ts b/src/voice/consts.ts index dbd3b574f4..c530c60f0b 100644 --- a/src/voice/consts.ts +++ b/src/voice/consts.ts @@ -18,6 +18,7 @@ export const WORKLET_NAME = "mx-voice-worklet"; export enum PayloadEvent { Timekeep = "timekeep", + AmplitudeMark = "amplitude_mark", } export interface IPayload { @@ -25,5 +26,12 @@ export interface IPayload { } export interface ITimingPayload extends IPayload { + ev: PayloadEvent.Timekeep; timeSeconds: number; } + +export interface IAmplitudePayload extends IPayload { + ev: PayloadEvent.AmplitudeMark; + forSecond: number; + amplitude: number; +} From 4f75e2944cd8fd399c9de461c68505961b2cb7a7 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Mon, 19 Apr 2021 23:11:41 -0600 Subject: [PATCH 4/6] Appease the linter --- src/components/views/rooms/VoiceRecordComposerTile.tsx | 10 +++++----- src/voice/RecorderWorklet.ts | 4 ++-- src/voice/VoiceRecording.ts | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/components/views/rooms/VoiceRecordComposerTile.tsx b/src/components/views/rooms/VoiceRecordComposerTile.tsx index 05beb3a0ca..9b7f0da472 100644 --- a/src/components/views/rooms/VoiceRecordComposerTile.tsx +++ b/src/components/views/rooms/VoiceRecordComposerTile.tsx @@ -53,11 +53,11 @@ export default class VoiceRecordComposerTile extends React.PureComponent { - switch(ev.data['ev']) { + switch (ev.data['ev']) { case PayloadEvent.Timekeep: this.processAudioUpdate(ev.data['timeSeconds']); break; From b519d851277c2d87ee8b3cd278f89a21927f47b6 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Tue, 20 Apr 2021 09:32:12 -0600 Subject: [PATCH 5/6] Update src/voice/RecorderWorklet.ts to use sanity Co-authored-by: Germain --- src/voice/RecorderWorklet.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/voice/RecorderWorklet.ts b/src/voice/RecorderWorklet.ts index eab6bc5f21..48387fc06e 100644 --- a/src/voice/RecorderWorklet.ts +++ b/src/voice/RecorderWorklet.ts @@ -44,8 +44,8 @@ class MxVoiceWorklet extends AudioWorkletProcessor { // waveform. // // We translate the amplitude down to 0-1 for sanity's sake. - const minVal = monoChan.reduce((m, v) => Math.min(m, v), Number.MAX_SAFE_INTEGER); - const maxVal = monoChan.reduce((m, v) => Math.max(m, v), Number.MIN_SAFE_INTEGER); + const minVal = Math.min(...monoChan); + const maxVal = Math.max(...monoChan); const amplitude = percentageOf(maxVal, -1, 1) - percentageOf(minVal, -1, 1); this.port.postMessage({ From 14809dfda7f5b1e58539e239028124b6da4e6f79 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Thu, 22 Apr 2021 08:22:31 -0600 Subject: [PATCH 6/6] Misc cleanup --- src/@types/global.d.ts | 1 - src/voice/RecorderWorklet.ts | 4 ---- 2 files changed, 5 deletions(-) diff --git a/src/@types/global.d.ts b/src/@types/global.d.ts index 78dad28566..41257c21f0 100644 --- a/src/@types/global.d.ts +++ b/src/@types/global.d.ts @@ -138,7 +138,6 @@ declare global { outputs: Float32Array[][], parameters: Record ): boolean; - } // https://github.com/microsoft/TypeScript/issues/28308#issuecomment-650802278 diff --git a/src/voice/RecorderWorklet.ts b/src/voice/RecorderWorklet.ts index 48387fc06e..7343d37066 100644 --- a/src/voice/RecorderWorklet.ts +++ b/src/voice/RecorderWorklet.ts @@ -25,10 +25,6 @@ declare const currentTime: number; class MxVoiceWorklet extends AudioWorkletProcessor { private nextAmplitudeSecond = 0; - constructor() { - super(); - } - process(inputs, outputs, parameters) { // We only fire amplitude updates once a second to avoid flooding the recording instance // with useless data. Much of the data would end up discarded, so we ratelimit ourselves