Improve visible waveform for voice messages

This tries to prioritize actual voice to decide the waveform, and clamps noise to zero to ensure the waveform doesn't have a perceptually noisy base. In theory this better matches the overall voice message content.
2021-05-13 22:20:08 -06:00 · 2021-05-13 22:20:08 -06:00 · df7b598ca7
parent e38d27f64e
commit df7b598ca7
3 changed files with 27 additions and 14 deletions
--- a/src/utils/arrays.ts
+++ b/src/utils/arrays.ts
@ -75,7 +75,8 @@ export function arraySmoothingResample(input: number[], points: number): number[
            for (let i = 1; i < input.length - 1; i += 2) {
                const prevPoint = input[i - 1];
                const nextPoint = input[i + 1];
-                const average = (prevPoint + nextPoint) / 2;
+                const currPoint = input[i];
+                const average = (prevPoint + nextPoint + currPoint) / 3;
                samples.push(average);
            }
            input = samples;
--- a/src/voice/Playback.ts
+++ b/src/voice/Playback.ts
@ -15,12 +15,13 @@ limitations under the License.
 */

 import EventEmitter from "events";
-import {UPDATE_EVENT} from "../stores/AsyncStore";
-import {arrayFastResample, arrayRescale, arraySeed, arraySmoothingResample} from "../utils/arrays";
-import {SimpleObservable} from "matrix-widget-api";
-import {IDestroyable} from "../utils/IDestroyable";
-import {PlaybackClock} from "./PlaybackClock";
-import {createAudioContext, decodeOgg} from "./compat";
+import { UPDATE_EVENT } from "../stores/AsyncStore";
+import { arrayFastResample, arrayRescale, arraySeed, arraySmoothingResample } from "../utils/arrays";
+import { SimpleObservable } from "matrix-widget-api";
+import { IDestroyable } from "../utils/IDestroyable";
+import { PlaybackClock } from "./PlaybackClock";
+import { createAudioContext, decodeOgg } from "./compat";
+import { clamp } from "../utils/numbers";

 export enum PlaybackState {
    Decoding = "decoding",
@ -33,9 +34,20 @@ export const PLAYBACK_WAVEFORM_SAMPLES = 39;
 const DEFAULT_WAVEFORM = arraySeed(0, PLAYBACK_WAVEFORM_SAMPLES);

 function makePlaybackWaveform(input: number[]): number[] {
-    // We use a smoothing resample to keep the rough shape of the waveform the user will be seeing. We
-    // then rescale so the user can see the waveform properly (loud noises == 100%).
-    return arrayRescale(arraySmoothingResample(input, PLAYBACK_WAVEFORM_SAMPLES), 0, 1);
+    // First, convert negative amplitudes to positive so we don't detect zero as "noisy".
+    const noiseWaveform = input.map(v => Math.abs(v));
+
+    // Next, we'll resample the waveform using a smoothing approach so we can keep the same rough shape.
+    // We also rescale the waveform to be 0-1 for the remaining function logic.
+    const resampled = arrayRescale(arraySmoothingResample(noiseWaveform, PLAYBACK_WAVEFORM_SAMPLES), 0, 1);
+
+    // Then, we'll do a high and low pass filter to isolate actual speaking volumes within the rescaled
+    // waveform. Most speech happens below the 0.5 mark.
+    const filtered = resampled.map(v => clamp(v, 0.1, 0.5));
+
+    // Finally, we'll rescale the filtered waveform (0.1-0.5 becomes 0-1 again) so the user sees something
+    // sensible. This is what we return to keep our contract of "values between zero and one".
+    return arrayRescale(filtered, 0, 1);
 }

 export class Playback extends EventEmitter implements IDestroyable {
--- a/test/utils/arrays-test.ts
+++ b/test/utils/arrays-test.ts
@ -73,10 +73,10 @@ describe('arrays', () => {
            // we'd be feeding a thousand values in and seeing what a curve of 250 values looks like,
            // but that's not really feasible to manually verify accuracy.
            [
-                {input: [2, 2, 0, 2, 2, 0, 2, 2, 0], output: [1, 1, 2, 1]}, // Odd -> Even
-                {input: [2, 2, 0, 2, 2, 0, 2, 2, 0], output: [1, 1, 2]}, // Odd -> Odd
-                {input: [2, 2, 0, 2, 2, 0, 2, 2], output: [1, 1, 2]}, // Even -> Odd
-                {input: [2, 2, 0, 2, 2, 0, 2, 2], output: [1, 2]}, // Even -> Even
+                {input: [4, 4, 1, 4, 4, 1, 4, 4, 1], output: [3, 3, 3, 3]}, // Odd -> Even
+                {input: [4, 4, 1, 4, 4, 1, 4, 4, 1], output: [3, 3, 3]}, // Odd -> Odd
+                {input: [4, 4, 1, 4, 4, 1, 4, 4], output: [3, 3, 3]}, // Even -> Odd
+                {input: [4, 4, 1, 4, 4, 1, 4, 4], output: [3, 3]}, // Even -> Even
            ].forEach((c, i) => expectSample(i, c.input, c.output, true));
        });