From 61730f2f881292bfcdf5becd249c0fc4c45edb1e Mon Sep 17 00:00:00 2001
From: Travis Ralston <travpc@gmail.com>
Date: Mon, 19 Apr 2021 23:05:06 -0600
Subject: [PATCH] Populate waveform data on voice message event

---
 .../views/rooms/VoiceRecordComposerTile.tsx   |  8 ++++-
 src/utils/arrays.ts                           |  2 +-
 src/voice/RecorderWorklet.ts                  | 36 ++++++++++++++++++-
 src/voice/VoiceRecording.ts                   | 12 +++++++
 src/voice/consts.ts                           |  8 +++++
 5 files changed, 63 insertions(+), 3 deletions(-)
diff --git a/src/components/views/rooms/VoiceRecordComposerTile.tsx b/src/components/views/rooms/VoiceRecordComposerTile.tsx
index f46b7c6311..05beb3a0ca 100644
--- a/src/components/views/rooms/VoiceRecordComposerTile.tsx
+++ b/src/components/views/rooms/VoiceRecordComposerTile.tsx
@@ -77,7 +77,13 @@ export default class VoiceRecordComposerTile extends React.PureComponent<IProps,
                 },
                 "org.matrix.experimental.msc2516.voice": { // MSC2516+MSC1767 experiment
                     duration: Math.round(this.state.recorder.durationSeconds * 1000),
-                    // TODO: @@ TravisR: Waveform.
+
+                    // Events can't have floats, so we try to maintain resolution by using 1024
+                    // as a maximum value. The waveform contains values between zero and 1, so this
+                    // should come out largely sane.
+                    //
+                    // We're expecting about one data point per second of audio.
+                    waveform: this.state.recorder.finalWaveform.map(v => Math.round(v * 1024)),
                 },
             });
             await VoiceRecordingStore.instance.disposeRecording();
diff --git a/src/utils/arrays.ts b/src/utils/arrays.ts
index 52308937f7..8ab66dfb29 100644
--- a/src/utils/arrays.ts
+++ b/src/utils/arrays.ts
@@ -54,7 +54,7 @@ export function arraySeed<T>(val: T, length: number): T[] {
  * @param a The array to clone. Must be defined.
  * @returns A copy of the array.
  */
-export function arrayFastClone(a: any[]): any[] {
+export function arrayFastClone<T>(a: T[]): T[] {
     return a.slice(0, a.length);
 }
 
diff --git a/src/voice/RecorderWorklet.ts b/src/voice/RecorderWorklet.ts
index 11f24fce4c..8d6f1e9627 100644
--- a/src/voice/RecorderWorklet.ts
+++ b/src/voice/RecorderWorklet.ts
@@ -14,7 +14,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */
 
-import {ITimingPayload, PayloadEvent, WORKLET_NAME} from "./consts";
+import {IAmplitudePayload, ITimingPayload, PayloadEvent, WORKLET_NAME} from "./consts";
+import {percentageOf} from "../utils/numbers";
 
 // from AudioWorkletGlobalScope: https://developer.mozilla.org/en-US/docs/Web/API/AudioWorkletGlobalScope
 declare const currentTime: number;
@@ -22,12 +23,45 @@ declare const currentFrame: number;
 declare const sampleRate: number;
 
 class MxVoiceWorklet extends AudioWorkletProcessor {
+    private nextAmplitudeSecond = 0;
+
     constructor() {
         super();
     }
 
     process(inputs, outputs, parameters) {
+        // We only fire amplitude updates once a second to avoid flooding the recording instance
+        // with useless data. Much of the data would end up discarded, so we ratelimit ourselves
+        // here.
+        const currentSecond = Math.round(currentTime);
+        if (currentSecond === this.nextAmplitudeSecond) {
+            // We're expecting exactly one mono input source, so just grab the very first frame of
+            // samples for the analysis.
+            const monoChan = inputs[0][0];
+
+            // The amplitude of the frame's samples is effectively the loudness of the frame. This
+            // translates into a bar which can be rendered as part of the whole recording clip's
+            // waveform.
+            //
+            // We translate the amplitude down to 0-1 for sanity's sake.
+            const minVal = monoChan.reduce((m, v) => Math.min(m, v), Number.MAX_SAFE_INTEGER);
+            const maxVal = monoChan.reduce((m, v) => Math.max(m, v), Number.MIN_SAFE_INTEGER);
+            const amplitude = percentageOf(maxVal, -1, 1) - percentageOf(minVal, -1, 1);
+
+            this.port.postMessage(<IAmplitudePayload>{
+                ev: PayloadEvent.AmplitudeMark,
+                amplitude: amplitude,
+                forSecond: currentSecond,
+            });
+            this.nextAmplitudeSecond++;
+        }
+
+        // We mostly use this worklet to fire regular clock updates through to components
         this.port.postMessage(<ITimingPayload>{ev: PayloadEvent.Timekeep, timeSeconds: currentTime});
+
+        // We're supposed to return false when we're "done" with the audio clip, but seeing as
+        // we are acting as a passive processor we are never truly "done". The browser will clean
+        // us up when it is done with us.
         return true;
     }
 }
diff --git a/src/voice/VoiceRecording.ts b/src/voice/VoiceRecording.ts
index 8e506c235c..716936f636 100644
--- a/src/voice/VoiceRecording.ts
+++ b/src/voice/VoiceRecording.ts
@@ -24,6 +24,7 @@ import EventEmitter from "events";
 import {IDestroyable} from "../utils/IDestroyable";
 import {Singleflight} from "../utils/Singleflight";
 import {PayloadEvent, WORKLET_NAME} from "./consts";
+import {arrayFastClone} from "../utils/arrays";
 
 const CHANNELS = 1; // stereo isn't important
 const SAMPLE_RATE = 48000; // 48khz is what WebRTC uses. 12khz is where we lose quality.
@@ -55,11 +56,16 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
     private mxc: string;
     private recording = false;
     private observable: SimpleObservable<IRecordingUpdate>;
+    private amplitudes: number[] = []; // at each second mark, generated
 
     public constructor(private client: MatrixClient) {
         super();
     }
 
+    public get finalWaveform(): number[] {
+        return arrayFastClone(this.amplitudes);
+    }
+
     public get contentType(): string {
         return "audio/ogg";
     }
@@ -114,6 +120,12 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
                 case PayloadEvent.Timekeep:
                     this.processAudioUpdate(ev.data['timeSeconds']);
                     break;
+                case PayloadEvent.AmplitudeMark:
+                    // Sanity check to make sure we're adding about one sample per second
+                    if (ev.data['forSecond'] === this.amplitudes.length) {
+                        this.amplitudes.push(ev.data['amplitude']);
+                    }
+                    break;
             }
         };
 
diff --git a/src/voice/consts.ts b/src/voice/consts.ts
index dbd3b574f4..c530c60f0b 100644
--- a/src/voice/consts.ts
+++ b/src/voice/consts.ts
@@ -18,6 +18,7 @@ export const WORKLET_NAME = "mx-voice-worklet";
 
 export enum PayloadEvent {
     Timekeep = "timekeep",
+    AmplitudeMark = "amplitude_mark",
 }
 
 export interface IPayload {
@@ -25,5 +26,12 @@ export interface IPayload {
 }
 
 export interface ITimingPayload extends IPayload {
+    ev: PayloadEvent.Timekeep;
     timeSeconds: number;
 }
+
+export interface IAmplitudePayload extends IPayload {
+    ev: PayloadEvent.AmplitudeMark;
+    forSecond: number;
+    amplitude: number;
+}