From 449e028bbd1d537ded2cf21d2ba4581529153e67 Mon Sep 17 00:00:00 2001
From: Travis Ralston <travpc@gmail.com>
Date: Wed, 24 Mar 2021 23:31:02 -0600
Subject: [PATCH] Actually use a waveform instead of the frequency data

---
 res/css/_components.scss                      |  2 +-
 .../{_FrequencyBars.scss => _Waveform.scss}   |  4 +-
 .../views/rooms/VoiceRecordComposerTile.tsx   |  8 +--
 .../views/voice_messages/FrequencyBars.tsx    | 58 -----------------
 .../voice_messages/LiveRecordingWaveform.tsx  | 64 +++++++++++++++++++
 .../views/voice_messages/Waveform.tsx         | 48 ++++++++++++++
 src/voice/VoiceRecorder.ts                    | 55 +++++++++++-----
 7 files changed, 159 insertions(+), 80 deletions(-)
 rename res/css/views/voice_messages/{_FrequencyBars.scss => _Waveform.scss} (95%)
 delete mode 100644 src/components/views/voice_messages/FrequencyBars.tsx
 create mode 100644 src/components/views/voice_messages/LiveRecordingWaveform.tsx
 create mode 100644 src/components/views/voice_messages/Waveform.tsx
diff --git a/res/css/_components.scss b/res/css/_components.scss
index 33dc6e72cf..1eabd6f5c6 100644
--- a/res/css/_components.scss
+++ b/res/css/_components.scss
@@ -246,7 +246,7 @@
 @import "./views/toasts/_AnalyticsToast.scss";
 @import "./views/toasts/_NonUrgentEchoFailureToast.scss";
 @import "./views/verification/_VerificationShowSas.scss";
-@import "./views/voice_messages/_FrequencyBars.scss";
+@import "./views/voice_messages/_Waveform.scss";
 @import "./views/voip/_CallContainer.scss";
 @import "./views/voip/_CallView.scss";
 @import "./views/voip/_DialPad.scss";
diff --git a/res/css/views/voice_messages/_FrequencyBars.scss b/res/css/views/voice_messages/_Waveform.scss
similarity index 95%
rename from res/css/views/voice_messages/_FrequencyBars.scss
rename to res/css/views/voice_messages/_Waveform.scss
index b38cdfff92..23eedf2dbd 100644
--- a/res/css/views/voice_messages/_FrequencyBars.scss
+++ b/res/css/views/voice_messages/_Waveform.scss
@@ -14,14 +14,14 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */
 
-.mx_FrequencyBars {
+.mx_Waveform {
     position: relative;
     height: 30px; // tallest bar can only be 30px
 
     display: flex;
     align-items: center; // so the bars grow from the middle
 
-    .mx_FrequencyBars_bar {
+    .mx_Waveform_bar {
         width: 2px;
         margin-left: 1px;
         margin-right: 1px;
diff --git a/src/components/views/rooms/VoiceRecordComposerTile.tsx b/src/components/views/rooms/VoiceRecordComposerTile.tsx
index c57fc79eeb..061daab915 100644
--- a/src/components/views/rooms/VoiceRecordComposerTile.tsx
+++ b/src/components/views/rooms/VoiceRecordComposerTile.tsx
@@ -21,7 +21,7 @@ import {VoiceRecorder} from "../../../voice/VoiceRecorder";
 import {Room} from "matrix-js-sdk/src/models/room";
 import {MatrixClientPeg} from "../../../MatrixClientPeg";
 import classNames from "classnames";
-import FrequencyBars from "../voice_messages/FrequencyBars";
+import LiveRecordingWaveform from "../voice_messages/LiveRecordingWaveform";
 
 interface IProps {
     room: Room;
@@ -68,16 +68,16 @@ export default class VoiceRecordComposerTile extends React.PureComponent<IProps,
             'mx_VoiceRecordComposerTile_stop': !!this.state.recorder,
         });
 
-        let bars = null;
+        let waveform = null;
         let tooltip = _t("Record a voice message");
         if (!!this.state.recorder) {
             // TODO: @@ TravisR: Change to match behaviour
             tooltip = _t("Stop & send recording");
-            bars = <FrequencyBars recorder={this.state.recorder} />;
+            waveform = <LiveRecordingWaveform recorder={this.state.recorder} />;
         }
 
         return (<>
-            {bars}
+            {waveform}
             <AccessibleTooltipButton
                 className={classes}
                 onClick={this.onStartStopVoiceMessage}
diff --git a/src/components/views/voice_messages/FrequencyBars.tsx b/src/components/views/voice_messages/FrequencyBars.tsx
deleted file mode 100644
index 73ea7bc862..0000000000
--- a/src/components/views/voice_messages/FrequencyBars.tsx
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
-Copyright 2021 The Matrix.org Foundation C.I.C.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-import React from "react";
-import {IFrequencyPackage, VoiceRecorder} from "../../../voice/VoiceRecorder";
-import {replaceableComponent} from "../../../utils/replaceableComponent";
-import {arrayFastResample, arraySeed} from "../../../utils/arrays";
-import {percentageOf} from "../../../utils/numbers";
-
-interface IProps {
-    recorder: VoiceRecorder
-}
-
-interface IState {
-    heights: number[];
-}
-
-const DOWNSAMPLE_TARGET = 35; // number of bars
-
-@replaceableComponent("views.voice_messages.FrequencyBars")
-export default class FrequencyBars extends React.PureComponent<IProps, IState> {
-    public constructor(props) {
-        super(props);
-
-        this.state = {heights: arraySeed(0, DOWNSAMPLE_TARGET)};
-        this.props.recorder.frequencyData.onUpdate(this.onFrequencyData);
-    }
-
-    private onFrequencyData = (freq: IFrequencyPackage) => {
-        // We're downsampling from about 1024 points to about 35, so this function is fine (see docs/impl)
-        const bars = arrayFastResample(Array.from(freq.dbBars), DOWNSAMPLE_TARGET);
-        this.setState({
-            // Values are somewhat arbitrary, but help decide what shape the graph should be
-            heights: bars.map(b => percentageOf(b, -150, -70) * 100),
-        });
-    };
-
-    public render() {
-        return <div className='mx_FrequencyBars'>
-            {this.state.heights.map((h, i) => {
-                return <span key={i} style={{height: h + '%'}} className='mx_FrequencyBars_bar' />;
-            })}
-        </div>;
-    }
-}
diff --git a/src/components/views/voice_messages/LiveRecordingWaveform.tsx b/src/components/views/voice_messages/LiveRecordingWaveform.tsx
new file mode 100644
index 0000000000..506532744a
--- /dev/null
+++ b/src/components/views/voice_messages/LiveRecordingWaveform.tsx
@@ -0,0 +1,64 @@
+/*
+Copyright 2021 The Matrix.org Foundation C.I.C.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+import React from "react";
+import {IRecordingUpdate, VoiceRecorder} from "../../../voice/VoiceRecorder";
+import {replaceableComponent} from "../../../utils/replaceableComponent";
+import {arrayFastResample, arraySeed} from "../../../utils/arrays";
+import {clamp, percentageOf} from "../../../utils/numbers";
+import Waveform from "./Waveform";
+
+interface IProps {
+    recorder: VoiceRecorder;
+}
+
+interface IState {
+    heights: number[];
+}
+
+const DOWNSAMPLE_TARGET = 35; // number of bars we want
+
+/**
+ * A waveform which shows the waveform of a live recording
+ */
+@replaceableComponent("views.voice_messages.LiveRecordingWaveform")
+export default class LiveRecordingWaveform extends React.PureComponent<IProps, IState> {
+    public constructor(props) {
+        super(props);
+
+        this.state = {heights: arraySeed(0, DOWNSAMPLE_TARGET)};
+        this.props.recorder.liveData.onUpdate(this.onRecordingUpdate);
+    }
+
+    private onRecordingUpdate = (update: IRecordingUpdate) => {
+        // The waveform and the downsample target are pretty close, so we should be fine to
+        // do this, despite the docs on arrayFastResample.
+        const bars = arrayFastResample(Array.from(update.waveform), DOWNSAMPLE_TARGET);
+        this.setState({
+            // The incoming data is between zero and one, but typically even screaming into a
+            // microphone won't send you over 0.6, so we "cap" the graph at about 0.4 for a
+            // point where the average user can still see feedback and be perceived as peaking
+            // when talking "loudly".
+            //
+            // We multiply by 100 because the Waveform component wants values in 0-100 (percentages)
+            heights: bars.map(b => percentageOf(b, 0, 0.40) * 100),
+        });
+    };
+
+    public render() {
+        return <Waveform heights={this.state.heights} />;
+    }
+}
diff --git a/src/components/views/voice_messages/Waveform.tsx b/src/components/views/voice_messages/Waveform.tsx
new file mode 100644
index 0000000000..9736db54d1
--- /dev/null
+++ b/src/components/views/voice_messages/Waveform.tsx
@@ -0,0 +1,48 @@
+/*
+Copyright 2021 The Matrix.org Foundation C.I.C.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+import React from "react";
+import {IRecordingUpdate, VoiceRecorder} from "../../../voice/VoiceRecorder";
+import {replaceableComponent} from "../../../utils/replaceableComponent";
+import {arrayFastResample, arraySeed} from "../../../utils/arrays";
+import {percentageOf} from "../../../utils/numbers";
+
+interface IProps {
+    heights: number[]; // percentages as integers (0-100)
+}
+
+interface IState {
+}
+
+/**
+ * A simple waveform component. This renders bars (centered vertically) for each
+ * height provided in the component properties. Updating the properties will update
+ * the rendered waveform.
+ */
+@replaceableComponent("views.voice_messages.Waveform")
+export default class Waveform extends React.PureComponent<IProps, IState> {
+    public constructor(props) {
+        super(props);
+    }
+
+    public render() {
+        return <div className='mx_Waveform'>
+            {this.props.heights.map((h, i) => {
+                return <span key={i} style={{height: h + '%'}} className='mx_Waveform_bar' />;
+            })}
+        </div>;
+    }
+}
diff --git a/src/voice/VoiceRecorder.ts b/src/voice/VoiceRecorder.ts
index 4bdd0b0af3..a85c3acad3 100644
--- a/src/voice/VoiceRecorder.ts
+++ b/src/voice/VoiceRecorder.ts
@@ -25,10 +25,8 @@ const SAMPLE_RATE = 48000; // 48khz is what WebRTC uses. 12khz is where we lose
 const BITRATE = 24000; // 24kbps is pretty high quality for our use case in opus.
 const FREQ_SAMPLE_RATE = 10; // Target rate of frequency data (samples / sec). We don't need this super often.
 
-export interface IFrequencyPackage {
-    dbBars: Float32Array;
-    dbMin: number;
-    dbMax: number;
+export interface IRecordingUpdate {
+    waveform: number[]; // floating points between 0 (low) and 1 (high).
 
     // TODO: @@ TravisR: Generalize this for a timing package?
 }
@@ -38,11 +36,11 @@ export class VoiceRecorder {
     private recorderContext: AudioContext;
     private recorderSource: MediaStreamAudioSourceNode;
     private recorderStream: MediaStream;
-    private recorderFreqNode: AnalyserNode;
+    private recorderFFT: AnalyserNode;
     private buffer = new Uint8Array(0);
     private mxc: string;
     private recording = false;
-    private observable: SimpleObservable<IFrequencyPackage>;
+    private observable: SimpleObservable<IRecordingUpdate>;
     private freqTimerId: number;
 
     public constructor(private client: MatrixClient) {
@@ -64,8 +62,16 @@ export class VoiceRecorder {
             sampleRate: SAMPLE_RATE, // once again, the browser will resample for us
         });
         this.recorderSource = this.recorderContext.createMediaStreamSource(this.recorderStream);
-        this.recorderFreqNode = this.recorderContext.createAnalyser();
-        this.recorderSource.connect(this.recorderFreqNode);
+        this.recorderFFT = this.recorderContext.createAnalyser();
+
+        // Bring the FFT time domain down a bit. The default is 2048, and this must be a power
+        // of two. We use 64 points because we happen to know down the line we need less than
+        // that, but 32 would be too few. Large numbers are not helpful here and do not add
+        // precision: they introduce higher precision outputs of the FFT (frequency data), but
+        // it makes the time domain less than helpful.
+        this.recorderFFT.fftSize = 64;
+
+        this.recorderSource.connect(this.recorderFFT);
         this.recorder = new Recorder({
             encoderPath, // magic from webpack
             encoderSampleRate: SAMPLE_RATE,
@@ -91,7 +97,7 @@ export class VoiceRecorder {
         };
     }
 
-    public get frequencyData(): SimpleObservable<IFrequencyPackage> {
+    public get liveData(): SimpleObservable<IRecordingUpdate> {
         if (!this.recording) throw new Error("No observable when not recording");
         return this.observable;
     }
@@ -121,16 +127,35 @@ export class VoiceRecorder {
         if (this.observable) {
             this.observable.close();
         }
-        this.observable = new SimpleObservable<IFrequencyPackage>();
+        this.observable = new SimpleObservable<IRecordingUpdate>();
         await this.makeRecorder();
         this.freqTimerId = setInterval(() => {
             if (!this.recording) return;
-            const data = new Float32Array(this.recorderFreqNode.frequencyBinCount);
-            this.recorderFreqNode.getFloatFrequencyData(data);
+
+            // The time domain is the input to the FFT, which means we use an array of the same
+            // size. The time domain is also known as the audio waveform. We're ignoring the
+            // output of the FFT here (frequency data) because we're not interested in it.
+            //
+            // We use bytes out of the analyser because floats have weird precision problems
+            // and are slightly more difficult to work with. The bytes are easy to work with,
+            // which is why we pick them (they're also more precise, but we care less about that).
+            const data = new Uint8Array(this.recorderFFT.fftSize);
+            this.recorderFFT.getByteTimeDomainData(data);
+
+            // Because we're dealing with a uint array we need to do math a bit differently.
+            // If we just `Array.from()` the uint array, we end up with 1s and 0s, which aren't
+            // what we're after. Instead, we have to use a bit of manual looping to correctly end
+            // up with the right values
+            const translatedData: number[] = [];
+            for (let i = 0; i < data.length; i++) {
+                // All we're doing here is inverting the amplitude and putting the metric somewhere
+                // between zero and one. Without the inversion, lower values are "louder", which is
+                // not super helpful.
+                translatedData.push(1 - (data[i] / 128.0));
+            }
+
             this.observable.update({
-                dbBars: data,
-                dbMin: this.recorderFreqNode.minDecibels,
-                dbMax: this.recorderFreqNode.maxDecibels,
+                waveform: translatedData,
             });
         }, 1000 / FREQ_SAMPLE_RATE) as any as number; // XXX: Linter doesn't understand timer environment
         await this.recorder.start();