From df7b598ca7322a205dd5f14fe5a75db5722c761a Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Thu, 13 May 2021 22:20:08 -0600 Subject: [PATCH] Improve visible waveform for voice messages This tries to prioritize actual voice to decide the waveform, and clamps noise to zero to ensure the waveform doesn't have a perceptually noisy base. In theory this better matches the overall voice message content. --- src/utils/arrays.ts | 3 ++- src/voice/Playback.ts | 30 +++++++++++++++++++++--------- test/utils/arrays-test.ts | 8 ++++---- 3 files changed, 27 insertions(+), 14 deletions(-) diff --git a/src/utils/arrays.ts b/src/utils/arrays.ts index 56bce5b2da..e527f43c29 100644 --- a/src/utils/arrays.ts +++ b/src/utils/arrays.ts @@ -75,7 +75,8 @@ export function arraySmoothingResample(input: number[], points: number): number[ for (let i = 1; i < input.length - 1; i += 2) { const prevPoint = input[i - 1]; const nextPoint = input[i + 1]; - const average = (prevPoint + nextPoint) / 2; + const currPoint = input[i]; + const average = (prevPoint + nextPoint + currPoint) / 3; samples.push(average); } input = samples; diff --git a/src/voice/Playback.ts b/src/voice/Playback.ts index 5488ed6b84..d67b75bd6b 100644 --- a/src/voice/Playback.ts +++ b/src/voice/Playback.ts @@ -15,12 +15,13 @@ limitations under the License. */ import EventEmitter from "events"; -import {UPDATE_EVENT} from "../stores/AsyncStore"; -import {arrayFastResample, arrayRescale, arraySeed, arraySmoothingResample} from "../utils/arrays"; -import {SimpleObservable} from "matrix-widget-api"; -import {IDestroyable} from "../utils/IDestroyable"; -import {PlaybackClock} from "./PlaybackClock"; -import {createAudioContext, decodeOgg} from "./compat"; +import { UPDATE_EVENT } from "../stores/AsyncStore"; +import { arrayFastResample, arrayRescale, arraySeed, arraySmoothingResample } from "../utils/arrays"; +import { SimpleObservable } from "matrix-widget-api"; +import { IDestroyable } from "../utils/IDestroyable"; +import { PlaybackClock } from "./PlaybackClock"; +import { createAudioContext, decodeOgg } from "./compat"; +import { clamp } from "../utils/numbers"; export enum PlaybackState { Decoding = "decoding", @@ -33,9 +34,20 @@ export const PLAYBACK_WAVEFORM_SAMPLES = 39; const DEFAULT_WAVEFORM = arraySeed(0, PLAYBACK_WAVEFORM_SAMPLES); function makePlaybackWaveform(input: number[]): number[] { - // We use a smoothing resample to keep the rough shape of the waveform the user will be seeing. We - // then rescale so the user can see the waveform properly (loud noises == 100%). - return arrayRescale(arraySmoothingResample(input, PLAYBACK_WAVEFORM_SAMPLES), 0, 1); + // First, convert negative amplitudes to positive so we don't detect zero as "noisy". + const noiseWaveform = input.map(v => Math.abs(v)); + + // Next, we'll resample the waveform using a smoothing approach so we can keep the same rough shape. + // We also rescale the waveform to be 0-1 for the remaining function logic. + const resampled = arrayRescale(arraySmoothingResample(noiseWaveform, PLAYBACK_WAVEFORM_SAMPLES), 0, 1); + + // Then, we'll do a high and low pass filter to isolate actual speaking volumes within the rescaled + // waveform. Most speech happens below the 0.5 mark. + const filtered = resampled.map(v => clamp(v, 0.1, 0.5)); + + // Finally, we'll rescale the filtered waveform (0.1-0.5 becomes 0-1 again) so the user sees something + // sensible. This is what we return to keep our contract of "values between zero and one". + return arrayRescale(filtered, 0, 1); } export class Playback extends EventEmitter implements IDestroyable { diff --git a/test/utils/arrays-test.ts b/test/utils/arrays-test.ts index b55de3b73b..5974915965 100644 --- a/test/utils/arrays-test.ts +++ b/test/utils/arrays-test.ts @@ -73,10 +73,10 @@ describe('arrays', () => { // we'd be feeding a thousand values in and seeing what a curve of 250 values looks like, // but that's not really feasible to manually verify accuracy. [ - {input: [2, 2, 0, 2, 2, 0, 2, 2, 0], output: [1, 1, 2, 1]}, // Odd -> Even - {input: [2, 2, 0, 2, 2, 0, 2, 2, 0], output: [1, 1, 2]}, // Odd -> Odd - {input: [2, 2, 0, 2, 2, 0, 2, 2], output: [1, 1, 2]}, // Even -> Odd - {input: [2, 2, 0, 2, 2, 0, 2, 2], output: [1, 2]}, // Even -> Even + {input: [4, 4, 1, 4, 4, 1, 4, 4, 1], output: [3, 3, 3, 3]}, // Odd -> Even + {input: [4, 4, 1, 4, 4, 1, 4, 4, 1], output: [3, 3, 3]}, // Odd -> Odd + {input: [4, 4, 1, 4, 4, 1, 4, 4], output: [3, 3, 3]}, // Even -> Odd + {input: [4, 4, 1, 4, 4, 1, 4, 4], output: [3, 3]}, // Even -> Even ].forEach((c, i) => expectSample(i, c.input, c.output, true)); });