From df7b598ca7322a205dd5f14fe5a75db5722c761a Mon Sep 17 00:00:00 2001
From: Travis Ralston <travisr@matrix.org>
Date: Thu, 13 May 2021 22:20:08 -0600
Subject: [PATCH] Improve visible waveform for voice messages

This tries to prioritize actual voice to decide the waveform, and clamps noise to zero to ensure the waveform doesn't have a perceptually noisy base.

In theory this better matches the overall voice message content.
---
 src/utils/arrays.ts       |  3 ++-
 src/voice/Playback.ts     | 30 +++++++++++++++++++++---------
 test/utils/arrays-test.ts |  8 ++++----
 3 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/src/utils/arrays.ts b/src/utils/arrays.ts
index 56bce5b2da..e527f43c29 100644
--- a/src/utils/arrays.ts
+++ b/src/utils/arrays.ts
@@ -75,7 +75,8 @@ export function arraySmoothingResample(input: number[], points: number): number[
             for (let i = 1; i < input.length - 1; i += 2) {
                 const prevPoint = input[i - 1];
                 const nextPoint = input[i + 1];
-                const average = (prevPoint + nextPoint) / 2;
+                const currPoint = input[i];
+                const average = (prevPoint + nextPoint + currPoint) / 3;
                 samples.push(average);
             }
             input = samples;
diff --git a/src/voice/Playback.ts b/src/voice/Playback.ts
index 5488ed6b84..d67b75bd6b 100644
--- a/src/voice/Playback.ts
+++ b/src/voice/Playback.ts
@@ -15,12 +15,13 @@ limitations under the License.
 */
 
 import EventEmitter from "events";
-import {UPDATE_EVENT} from "../stores/AsyncStore";
-import {arrayFastResample, arrayRescale, arraySeed, arraySmoothingResample} from "../utils/arrays";
-import {SimpleObservable} from "matrix-widget-api";
-import {IDestroyable} from "../utils/IDestroyable";
-import {PlaybackClock} from "./PlaybackClock";
-import {createAudioContext, decodeOgg} from "./compat";
+import { UPDATE_EVENT } from "../stores/AsyncStore";
+import { arrayFastResample, arrayRescale, arraySeed, arraySmoothingResample } from "../utils/arrays";
+import { SimpleObservable } from "matrix-widget-api";
+import { IDestroyable } from "../utils/IDestroyable";
+import { PlaybackClock } from "./PlaybackClock";
+import { createAudioContext, decodeOgg } from "./compat";
+import { clamp } from "../utils/numbers";
 
 export enum PlaybackState {
     Decoding = "decoding",
@@ -33,9 +34,20 @@ export const PLAYBACK_WAVEFORM_SAMPLES = 39;
 const DEFAULT_WAVEFORM = arraySeed(0, PLAYBACK_WAVEFORM_SAMPLES);
 
 function makePlaybackWaveform(input: number[]): number[] {
-    // We use a smoothing resample to keep the rough shape of the waveform the user will be seeing. We
-    // then rescale so the user can see the waveform properly (loud noises == 100%).
-    return arrayRescale(arraySmoothingResample(input, PLAYBACK_WAVEFORM_SAMPLES), 0, 1);
+    // First, convert negative amplitudes to positive so we don't detect zero as "noisy".
+    const noiseWaveform = input.map(v => Math.abs(v));
+
+    // Next, we'll resample the waveform using a smoothing approach so we can keep the same rough shape.
+    // We also rescale the waveform to be 0-1 for the remaining function logic.
+    const resampled = arrayRescale(arraySmoothingResample(noiseWaveform, PLAYBACK_WAVEFORM_SAMPLES), 0, 1);
+
+    // Then, we'll do a high and low pass filter to isolate actual speaking volumes within the rescaled
+    // waveform. Most speech happens below the 0.5 mark.
+    const filtered = resampled.map(v => clamp(v, 0.1, 0.5));
+
+    // Finally, we'll rescale the filtered waveform (0.1-0.5 becomes 0-1 again) so the user sees something
+    // sensible. This is what we return to keep our contract of "values between zero and one".
+    return arrayRescale(filtered, 0, 1);
 }
 
 export class Playback extends EventEmitter implements IDestroyable {
diff --git a/test/utils/arrays-test.ts b/test/utils/arrays-test.ts
index b55de3b73b..5974915965 100644
--- a/test/utils/arrays-test.ts
+++ b/test/utils/arrays-test.ts
@@ -73,10 +73,10 @@ describe('arrays', () => {
             // we'd be feeding a thousand values in and seeing what a curve of 250 values looks like,
             // but that's not really feasible to manually verify accuracy.
             [
-                {input: [2, 2, 0, 2, 2, 0, 2, 2, 0], output: [1, 1, 2, 1]}, // Odd -> Even
-                {input: [2, 2, 0, 2, 2, 0, 2, 2, 0], output: [1, 1, 2]}, // Odd -> Odd
-                {input: [2, 2, 0, 2, 2, 0, 2, 2], output: [1, 1, 2]}, // Even -> Odd
-                {input: [2, 2, 0, 2, 2, 0, 2, 2], output: [1, 2]}, // Even -> Even
+                {input: [4, 4, 1, 4, 4, 1, 4, 4, 1], output: [3, 3, 3, 3]}, // Odd -> Even
+                {input: [4, 4, 1, 4, 4, 1, 4, 4, 1], output: [3, 3, 3]}, // Odd -> Odd
+                {input: [4, 4, 1, 4, 4, 1, 4, 4], output: [3, 3, 3]}, // Even -> Odd
+                {input: [4, 4, 1, 4, 4, 1, 4, 4], output: [3, 3]}, // Even -> Even
             ].forEach((c, i) => expectSample(i, c.input, c.output, true));
         });