From 449e028bbd1d537ded2cf21d2ba4581529153e67 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Wed, 24 Mar 2021 23:31:02 -0600 Subject: [PATCH] Actually use a waveform instead of the frequency data --- res/css/_components.scss | 2 +- .../{_FrequencyBars.scss => _Waveform.scss} | 4 +- .../views/rooms/VoiceRecordComposerTile.tsx | 8 +-- .../views/voice_messages/FrequencyBars.tsx | 58 ----------------- .../voice_messages/LiveRecordingWaveform.tsx | 64 +++++++++++++++++++ .../views/voice_messages/Waveform.tsx | 48 ++++++++++++++ src/voice/VoiceRecorder.ts | 55 +++++++++++----- 7 files changed, 159 insertions(+), 80 deletions(-) rename res/css/views/voice_messages/{_FrequencyBars.scss => _Waveform.scss} (95%) delete mode 100644 src/components/views/voice_messages/FrequencyBars.tsx create mode 100644 src/components/views/voice_messages/LiveRecordingWaveform.tsx create mode 100644 src/components/views/voice_messages/Waveform.tsx diff --git a/res/css/_components.scss b/res/css/_components.scss index 33dc6e72cf..1eabd6f5c6 100644 --- a/res/css/_components.scss +++ b/res/css/_components.scss @@ -246,7 +246,7 @@ @import "./views/toasts/_AnalyticsToast.scss"; @import "./views/toasts/_NonUrgentEchoFailureToast.scss"; @import "./views/verification/_VerificationShowSas.scss"; -@import "./views/voice_messages/_FrequencyBars.scss"; +@import "./views/voice_messages/_Waveform.scss"; @import "./views/voip/_CallContainer.scss"; @import "./views/voip/_CallView.scss"; @import "./views/voip/_DialPad.scss"; diff --git a/res/css/views/voice_messages/_FrequencyBars.scss b/res/css/views/voice_messages/_Waveform.scss similarity index 95% rename from res/css/views/voice_messages/_FrequencyBars.scss rename to res/css/views/voice_messages/_Waveform.scss index b38cdfff92..23eedf2dbd 100644 --- a/res/css/views/voice_messages/_FrequencyBars.scss +++ b/res/css/views/voice_messages/_Waveform.scss @@ -14,14 +14,14 @@ See the License for the specific language governing permissions and limitations under the License. */ -.mx_FrequencyBars { +.mx_Waveform { position: relative; height: 30px; // tallest bar can only be 30px display: flex; align-items: center; // so the bars grow from the middle - .mx_FrequencyBars_bar { + .mx_Waveform_bar { width: 2px; margin-left: 1px; margin-right: 1px; diff --git a/src/components/views/rooms/VoiceRecordComposerTile.tsx b/src/components/views/rooms/VoiceRecordComposerTile.tsx index c57fc79eeb..061daab915 100644 --- a/src/components/views/rooms/VoiceRecordComposerTile.tsx +++ b/src/components/views/rooms/VoiceRecordComposerTile.tsx @@ -21,7 +21,7 @@ import {VoiceRecorder} from "../../../voice/VoiceRecorder"; import {Room} from "matrix-js-sdk/src/models/room"; import {MatrixClientPeg} from "../../../MatrixClientPeg"; import classNames from "classnames"; -import FrequencyBars from "../voice_messages/FrequencyBars"; +import LiveRecordingWaveform from "../voice_messages/LiveRecordingWaveform"; interface IProps { room: Room; @@ -68,16 +68,16 @@ export default class VoiceRecordComposerTile extends React.PureComponent; + waveform = ; } return (<> - {bars} + {waveform} { - public constructor(props) { - super(props); - - this.state = {heights: arraySeed(0, DOWNSAMPLE_TARGET)}; - this.props.recorder.frequencyData.onUpdate(this.onFrequencyData); - } - - private onFrequencyData = (freq: IFrequencyPackage) => { - // We're downsampling from about 1024 points to about 35, so this function is fine (see docs/impl) - const bars = arrayFastResample(Array.from(freq.dbBars), DOWNSAMPLE_TARGET); - this.setState({ - // Values are somewhat arbitrary, but help decide what shape the graph should be - heights: bars.map(b => percentageOf(b, -150, -70) * 100), - }); - }; - - public render() { - return
- {this.state.heights.map((h, i) => { - return ; - })} -
; - } -} diff --git a/src/components/views/voice_messages/LiveRecordingWaveform.tsx b/src/components/views/voice_messages/LiveRecordingWaveform.tsx new file mode 100644 index 0000000000..506532744a --- /dev/null +++ b/src/components/views/voice_messages/LiveRecordingWaveform.tsx @@ -0,0 +1,64 @@ +/* +Copyright 2021 The Matrix.org Foundation C.I.C. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +import React from "react"; +import {IRecordingUpdate, VoiceRecorder} from "../../../voice/VoiceRecorder"; +import {replaceableComponent} from "../../../utils/replaceableComponent"; +import {arrayFastResample, arraySeed} from "../../../utils/arrays"; +import {clamp, percentageOf} from "../../../utils/numbers"; +import Waveform from "./Waveform"; + +interface IProps { + recorder: VoiceRecorder; +} + +interface IState { + heights: number[]; +} + +const DOWNSAMPLE_TARGET = 35; // number of bars we want + +/** + * A waveform which shows the waveform of a live recording + */ +@replaceableComponent("views.voice_messages.LiveRecordingWaveform") +export default class LiveRecordingWaveform extends React.PureComponent { + public constructor(props) { + super(props); + + this.state = {heights: arraySeed(0, DOWNSAMPLE_TARGET)}; + this.props.recorder.liveData.onUpdate(this.onRecordingUpdate); + } + + private onRecordingUpdate = (update: IRecordingUpdate) => { + // The waveform and the downsample target are pretty close, so we should be fine to + // do this, despite the docs on arrayFastResample. + const bars = arrayFastResample(Array.from(update.waveform), DOWNSAMPLE_TARGET); + this.setState({ + // The incoming data is between zero and one, but typically even screaming into a + // microphone won't send you over 0.6, so we "cap" the graph at about 0.4 for a + // point where the average user can still see feedback and be perceived as peaking + // when talking "loudly". + // + // We multiply by 100 because the Waveform component wants values in 0-100 (percentages) + heights: bars.map(b => percentageOf(b, 0, 0.40) * 100), + }); + }; + + public render() { + return ; + } +} diff --git a/src/components/views/voice_messages/Waveform.tsx b/src/components/views/voice_messages/Waveform.tsx new file mode 100644 index 0000000000..9736db54d1 --- /dev/null +++ b/src/components/views/voice_messages/Waveform.tsx @@ -0,0 +1,48 @@ +/* +Copyright 2021 The Matrix.org Foundation C.I.C. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +import React from "react"; +import {IRecordingUpdate, VoiceRecorder} from "../../../voice/VoiceRecorder"; +import {replaceableComponent} from "../../../utils/replaceableComponent"; +import {arrayFastResample, arraySeed} from "../../../utils/arrays"; +import {percentageOf} from "../../../utils/numbers"; + +interface IProps { + heights: number[]; // percentages as integers (0-100) +} + +interface IState { +} + +/** + * A simple waveform component. This renders bars (centered vertically) for each + * height provided in the component properties. Updating the properties will update + * the rendered waveform. + */ +@replaceableComponent("views.voice_messages.Waveform") +export default class Waveform extends React.PureComponent { + public constructor(props) { + super(props); + } + + public render() { + return
+ {this.props.heights.map((h, i) => { + return ; + })} +
; + } +} diff --git a/src/voice/VoiceRecorder.ts b/src/voice/VoiceRecorder.ts index 4bdd0b0af3..a85c3acad3 100644 --- a/src/voice/VoiceRecorder.ts +++ b/src/voice/VoiceRecorder.ts @@ -25,10 +25,8 @@ const SAMPLE_RATE = 48000; // 48khz is what WebRTC uses. 12khz is where we lose const BITRATE = 24000; // 24kbps is pretty high quality for our use case in opus. const FREQ_SAMPLE_RATE = 10; // Target rate of frequency data (samples / sec). We don't need this super often. -export interface IFrequencyPackage { - dbBars: Float32Array; - dbMin: number; - dbMax: number; +export interface IRecordingUpdate { + waveform: number[]; // floating points between 0 (low) and 1 (high). // TODO: @@ TravisR: Generalize this for a timing package? } @@ -38,11 +36,11 @@ export class VoiceRecorder { private recorderContext: AudioContext; private recorderSource: MediaStreamAudioSourceNode; private recorderStream: MediaStream; - private recorderFreqNode: AnalyserNode; + private recorderFFT: AnalyserNode; private buffer = new Uint8Array(0); private mxc: string; private recording = false; - private observable: SimpleObservable; + private observable: SimpleObservable; private freqTimerId: number; public constructor(private client: MatrixClient) { @@ -64,8 +62,16 @@ export class VoiceRecorder { sampleRate: SAMPLE_RATE, // once again, the browser will resample for us }); this.recorderSource = this.recorderContext.createMediaStreamSource(this.recorderStream); - this.recorderFreqNode = this.recorderContext.createAnalyser(); - this.recorderSource.connect(this.recorderFreqNode); + this.recorderFFT = this.recorderContext.createAnalyser(); + + // Bring the FFT time domain down a bit. The default is 2048, and this must be a power + // of two. We use 64 points because we happen to know down the line we need less than + // that, but 32 would be too few. Large numbers are not helpful here and do not add + // precision: they introduce higher precision outputs of the FFT (frequency data), but + // it makes the time domain less than helpful. + this.recorderFFT.fftSize = 64; + + this.recorderSource.connect(this.recorderFFT); this.recorder = new Recorder({ encoderPath, // magic from webpack encoderSampleRate: SAMPLE_RATE, @@ -91,7 +97,7 @@ export class VoiceRecorder { }; } - public get frequencyData(): SimpleObservable { + public get liveData(): SimpleObservable { if (!this.recording) throw new Error("No observable when not recording"); return this.observable; } @@ -121,16 +127,35 @@ export class VoiceRecorder { if (this.observable) { this.observable.close(); } - this.observable = new SimpleObservable(); + this.observable = new SimpleObservable(); await this.makeRecorder(); this.freqTimerId = setInterval(() => { if (!this.recording) return; - const data = new Float32Array(this.recorderFreqNode.frequencyBinCount); - this.recorderFreqNode.getFloatFrequencyData(data); + + // The time domain is the input to the FFT, which means we use an array of the same + // size. The time domain is also known as the audio waveform. We're ignoring the + // output of the FFT here (frequency data) because we're not interested in it. + // + // We use bytes out of the analyser because floats have weird precision problems + // and are slightly more difficult to work with. The bytes are easy to work with, + // which is why we pick them (they're also more precise, but we care less about that). + const data = new Uint8Array(this.recorderFFT.fftSize); + this.recorderFFT.getByteTimeDomainData(data); + + // Because we're dealing with a uint array we need to do math a bit differently. + // If we just `Array.from()` the uint array, we end up with 1s and 0s, which aren't + // what we're after. Instead, we have to use a bit of manual looping to correctly end + // up with the right values + const translatedData: number[] = []; + for (let i = 0; i < data.length; i++) { + // All we're doing here is inverting the amplitude and putting the metric somewhere + // between zero and one. Without the inversion, lower values are "louder", which is + // not super helpful. + translatedData.push(1 - (data[i] / 128.0)); + } + this.observable.update({ - dbBars: data, - dbMin: this.recorderFreqNode.minDecibels, - dbMax: this.recorderFreqNode.maxDecibels, + waveform: translatedData, }); }, 1000 / FREQ_SAMPLE_RATE) as any as number; // XXX: Linter doesn't understand timer environment await this.recorder.start();