diff --git a/res/css/_components.scss b/res/css/_components.scss index 33dc6e72cf..1eabd6f5c6 100644 --- a/res/css/_components.scss +++ b/res/css/_components.scss @@ -246,7 +246,7 @@ @import "./views/toasts/_AnalyticsToast.scss"; @import "./views/toasts/_NonUrgentEchoFailureToast.scss"; @import "./views/verification/_VerificationShowSas.scss"; -@import "./views/voice_messages/_FrequencyBars.scss"; +@import "./views/voice_messages/_Waveform.scss"; @import "./views/voip/_CallContainer.scss"; @import "./views/voip/_CallView.scss"; @import "./views/voip/_DialPad.scss"; diff --git a/res/css/views/voice_messages/_FrequencyBars.scss b/res/css/views/voice_messages/_Waveform.scss similarity index 95% rename from res/css/views/voice_messages/_FrequencyBars.scss rename to res/css/views/voice_messages/_Waveform.scss index b38cdfff92..23eedf2dbd 100644 --- a/res/css/views/voice_messages/_FrequencyBars.scss +++ b/res/css/views/voice_messages/_Waveform.scss @@ -14,14 +14,14 @@ See the License for the specific language governing permissions and limitations under the License. */ -.mx_FrequencyBars { +.mx_Waveform { position: relative; height: 30px; // tallest bar can only be 30px display: flex; align-items: center; // so the bars grow from the middle - .mx_FrequencyBars_bar { + .mx_Waveform_bar { width: 2px; margin-left: 1px; margin-right: 1px; diff --git a/src/components/views/rooms/VoiceRecordComposerTile.tsx b/src/components/views/rooms/VoiceRecordComposerTile.tsx index c57fc79eeb..061daab915 100644 --- a/src/components/views/rooms/VoiceRecordComposerTile.tsx +++ b/src/components/views/rooms/VoiceRecordComposerTile.tsx @@ -21,7 +21,7 @@ import {VoiceRecorder} from "../../../voice/VoiceRecorder"; import {Room} from "matrix-js-sdk/src/models/room"; import {MatrixClientPeg} from "../../../MatrixClientPeg"; import classNames from "classnames"; -import FrequencyBars from "../voice_messages/FrequencyBars"; +import LiveRecordingWaveform from "../voice_messages/LiveRecordingWaveform"; interface IProps { room: Room; @@ -68,16 +68,16 @@ export default class VoiceRecordComposerTile extends React.PureComponent<IProps, 'mx_VoiceRecordComposerTile_stop': !!this.state.recorder, }); - let bars = null; + let waveform = null; let tooltip = _t("Record a voice message"); if (!!this.state.recorder) { // TODO: @@ TravisR: Change to match behaviour tooltip = _t("Stop & send recording"); - bars = <FrequencyBars recorder={this.state.recorder} />; + waveform = <LiveRecordingWaveform recorder={this.state.recorder} />; } return (<> - {bars} + {waveform} <AccessibleTooltipButton className={classes} onClick={this.onStartStopVoiceMessage} diff --git a/src/components/views/voice_messages/FrequencyBars.tsx b/src/components/views/voice_messages/FrequencyBars.tsx deleted file mode 100644 index 73ea7bc862..0000000000 --- a/src/components/views/voice_messages/FrequencyBars.tsx +++ /dev/null @@ -1,58 +0,0 @@ -/* -Copyright 2021 The Matrix.org Foundation C.I.C. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -import React from "react"; -import {IFrequencyPackage, VoiceRecorder} from "../../../voice/VoiceRecorder"; -import {replaceableComponent} from "../../../utils/replaceableComponent"; -import {arrayFastResample, arraySeed} from "../../../utils/arrays"; -import {percentageOf} from "../../../utils/numbers"; - -interface IProps { - recorder: VoiceRecorder -} - -interface IState { - heights: number[]; -} - -const DOWNSAMPLE_TARGET = 35; // number of bars - -@replaceableComponent("views.voice_messages.FrequencyBars") -export default class FrequencyBars extends React.PureComponent<IProps, IState> { - public constructor(props) { - super(props); - - this.state = {heights: arraySeed(0, DOWNSAMPLE_TARGET)}; - this.props.recorder.frequencyData.onUpdate(this.onFrequencyData); - } - - private onFrequencyData = (freq: IFrequencyPackage) => { - // We're downsampling from about 1024 points to about 35, so this function is fine (see docs/impl) - const bars = arrayFastResample(Array.from(freq.dbBars), DOWNSAMPLE_TARGET); - this.setState({ - // Values are somewhat arbitrary, but help decide what shape the graph should be - heights: bars.map(b => percentageOf(b, -150, -70) * 100), - }); - }; - - public render() { - return <div className='mx_FrequencyBars'> - {this.state.heights.map((h, i) => { - return <span key={i} style={{height: h + '%'}} className='mx_FrequencyBars_bar' />; - })} - </div>; - } -} diff --git a/src/components/views/voice_messages/LiveRecordingWaveform.tsx b/src/components/views/voice_messages/LiveRecordingWaveform.tsx new file mode 100644 index 0000000000..506532744a --- /dev/null +++ b/src/components/views/voice_messages/LiveRecordingWaveform.tsx @@ -0,0 +1,64 @@ +/* +Copyright 2021 The Matrix.org Foundation C.I.C. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +import React from "react"; +import {IRecordingUpdate, VoiceRecorder} from "../../../voice/VoiceRecorder"; +import {replaceableComponent} from "../../../utils/replaceableComponent"; +import {arrayFastResample, arraySeed} from "../../../utils/arrays"; +import {clamp, percentageOf} from "../../../utils/numbers"; +import Waveform from "./Waveform"; + +interface IProps { + recorder: VoiceRecorder; +} + +interface IState { + heights: number[]; +} + +const DOWNSAMPLE_TARGET = 35; // number of bars we want + +/** + * A waveform which shows the waveform of a live recording + */ +@replaceableComponent("views.voice_messages.LiveRecordingWaveform") +export default class LiveRecordingWaveform extends React.PureComponent<IProps, IState> { + public constructor(props) { + super(props); + + this.state = {heights: arraySeed(0, DOWNSAMPLE_TARGET)}; + this.props.recorder.liveData.onUpdate(this.onRecordingUpdate); + } + + private onRecordingUpdate = (update: IRecordingUpdate) => { + // The waveform and the downsample target are pretty close, so we should be fine to + // do this, despite the docs on arrayFastResample. + const bars = arrayFastResample(Array.from(update.waveform), DOWNSAMPLE_TARGET); + this.setState({ + // The incoming data is between zero and one, but typically even screaming into a + // microphone won't send you over 0.6, so we "cap" the graph at about 0.4 for a + // point where the average user can still see feedback and be perceived as peaking + // when talking "loudly". + // + // We multiply by 100 because the Waveform component wants values in 0-100 (percentages) + heights: bars.map(b => percentageOf(b, 0, 0.40) * 100), + }); + }; + + public render() { + return <Waveform heights={this.state.heights} />; + } +} diff --git a/src/components/views/voice_messages/Waveform.tsx b/src/components/views/voice_messages/Waveform.tsx new file mode 100644 index 0000000000..9736db54d1 --- /dev/null +++ b/src/components/views/voice_messages/Waveform.tsx @@ -0,0 +1,48 @@ +/* +Copyright 2021 The Matrix.org Foundation C.I.C. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +import React from "react"; +import {IRecordingUpdate, VoiceRecorder} from "../../../voice/VoiceRecorder"; +import {replaceableComponent} from "../../../utils/replaceableComponent"; +import {arrayFastResample, arraySeed} from "../../../utils/arrays"; +import {percentageOf} from "../../../utils/numbers"; + +interface IProps { + heights: number[]; // percentages as integers (0-100) +} + +interface IState { +} + +/** + * A simple waveform component. This renders bars (centered vertically) for each + * height provided in the component properties. Updating the properties will update + * the rendered waveform. + */ +@replaceableComponent("views.voice_messages.Waveform") +export default class Waveform extends React.PureComponent<IProps, IState> { + public constructor(props) { + super(props); + } + + public render() { + return <div className='mx_Waveform'> + {this.props.heights.map((h, i) => { + return <span key={i} style={{height: h + '%'}} className='mx_Waveform_bar' />; + })} + </div>; + } +} diff --git a/src/voice/VoiceRecorder.ts b/src/voice/VoiceRecorder.ts index 4bdd0b0af3..a85c3acad3 100644 --- a/src/voice/VoiceRecorder.ts +++ b/src/voice/VoiceRecorder.ts @@ -25,10 +25,8 @@ const SAMPLE_RATE = 48000; // 48khz is what WebRTC uses. 12khz is where we lose const BITRATE = 24000; // 24kbps is pretty high quality for our use case in opus. const FREQ_SAMPLE_RATE = 10; // Target rate of frequency data (samples / sec). We don't need this super often. -export interface IFrequencyPackage { - dbBars: Float32Array; - dbMin: number; - dbMax: number; +export interface IRecordingUpdate { + waveform: number[]; // floating points between 0 (low) and 1 (high). // TODO: @@ TravisR: Generalize this for a timing package? } @@ -38,11 +36,11 @@ export class VoiceRecorder { private recorderContext: AudioContext; private recorderSource: MediaStreamAudioSourceNode; private recorderStream: MediaStream; - private recorderFreqNode: AnalyserNode; + private recorderFFT: AnalyserNode; private buffer = new Uint8Array(0); private mxc: string; private recording = false; - private observable: SimpleObservable<IFrequencyPackage>; + private observable: SimpleObservable<IRecordingUpdate>; private freqTimerId: number; public constructor(private client: MatrixClient) { @@ -64,8 +62,16 @@ export class VoiceRecorder { sampleRate: SAMPLE_RATE, // once again, the browser will resample for us }); this.recorderSource = this.recorderContext.createMediaStreamSource(this.recorderStream); - this.recorderFreqNode = this.recorderContext.createAnalyser(); - this.recorderSource.connect(this.recorderFreqNode); + this.recorderFFT = this.recorderContext.createAnalyser(); + + // Bring the FFT time domain down a bit. The default is 2048, and this must be a power + // of two. We use 64 points because we happen to know down the line we need less than + // that, but 32 would be too few. Large numbers are not helpful here and do not add + // precision: they introduce higher precision outputs of the FFT (frequency data), but + // it makes the time domain less than helpful. + this.recorderFFT.fftSize = 64; + + this.recorderSource.connect(this.recorderFFT); this.recorder = new Recorder({ encoderPath, // magic from webpack encoderSampleRate: SAMPLE_RATE, @@ -91,7 +97,7 @@ export class VoiceRecorder { }; } - public get frequencyData(): SimpleObservable<IFrequencyPackage> { + public get liveData(): SimpleObservable<IRecordingUpdate> { if (!this.recording) throw new Error("No observable when not recording"); return this.observable; } @@ -121,16 +127,35 @@ export class VoiceRecorder { if (this.observable) { this.observable.close(); } - this.observable = new SimpleObservable<IFrequencyPackage>(); + this.observable = new SimpleObservable<IRecordingUpdate>(); await this.makeRecorder(); this.freqTimerId = setInterval(() => { if (!this.recording) return; - const data = new Float32Array(this.recorderFreqNode.frequencyBinCount); - this.recorderFreqNode.getFloatFrequencyData(data); + + // The time domain is the input to the FFT, which means we use an array of the same + // size. The time domain is also known as the audio waveform. We're ignoring the + // output of the FFT here (frequency data) because we're not interested in it. + // + // We use bytes out of the analyser because floats have weird precision problems + // and are slightly more difficult to work with. The bytes are easy to work with, + // which is why we pick them (they're also more precise, but we care less about that). + const data = new Uint8Array(this.recorderFFT.fftSize); + this.recorderFFT.getByteTimeDomainData(data); + + // Because we're dealing with a uint array we need to do math a bit differently. + // If we just `Array.from()` the uint array, we end up with 1s and 0s, which aren't + // what we're after. Instead, we have to use a bit of manual looping to correctly end + // up with the right values + const translatedData: number[] = []; + for (let i = 0; i < data.length; i++) { + // All we're doing here is inverting the amplitude and putting the metric somewhere + // between zero and one. Without the inversion, lower values are "louder", which is + // not super helpful. + translatedData.push(1 - (data[i] / 128.0)); + } + this.observable.update({ - dbBars: data, - dbMin: this.recorderFreqNode.minDecibels, - dbMax: this.recorderFreqNode.maxDecibels, + waveform: translatedData, }); }, 1000 / FREQ_SAMPLE_RATE) as any as number; // XXX: Linter doesn't understand timer environment await this.recorder.start();