mirror of https://github.com/vector-im/riot-web
				
				
				
			Merge pull request #5989 from matrix-org/travis/voicemessages/safari
Support voice messages on Safaripull/21833/head
						commit
						d1cfde6f12
					
				|  | @ -0,0 +1,2 @@ | |||
| // Yes, this is empty.
 | ||||
| module.exports = {}; | ||||
|  | @ -186,7 +186,10 @@ | |||
|     ], | ||||
|     "moduleNameMapper": { | ||||
|       "\\.(gif|png|svg|ttf|woff2)$": "<rootDir>/__mocks__/imageMock.js", | ||||
|       "\\$webapp/i18n/languages.json": "<rootDir>/__mocks__/languages.json" | ||||
|       "\\$webapp/i18n/languages.json": "<rootDir>/__mocks__/languages.json", | ||||
|       "decoderWorker\\.min\\.js": "<rootDir>/__mocks__/empty.js", | ||||
|       "decoderWorker\\.min\\.wasm": "<rootDir>/__mocks__/empty.js", | ||||
|       "waveWorker\\.min\\.js": "<rootDir>/__mocks__/empty.js" | ||||
|     }, | ||||
|     "transformIgnorePatterns": [ | ||||
|       "/node_modules/(?!matrix-js-sdk).+$" | ||||
|  |  | |||
|  | @ -52,6 +52,9 @@ declare global { | |||
|             init: () => Promise<void>; | ||||
|         }; | ||||
| 
 | ||||
|         // Needed for Safari, unknown to TypeScript
 | ||||
|         webkitAudioContext: typeof AudioContext; | ||||
| 
 | ||||
|         mxContentMessages: ContentMessages; | ||||
|         mxToastStore: ToastStore; | ||||
|         mxDeviceListener: DeviceListener; | ||||
|  |  | |||
|  | @ -73,7 +73,9 @@ class ConsoleLogger { | |||
| 
 | ||||
|         // Convert objects and errors to helpful things
 | ||||
|         args = args.map((arg) => { | ||||
|             if (arg instanceof Error) { | ||||
|             if (arg instanceof DOMException) { | ||||
|                 return arg.message + ` (${arg.name} | ${arg.code}) ` + (arg.stack ? `\n${arg.stack}` : ''); | ||||
|             } else if (arg instanceof Error) { | ||||
|                 return arg.message + (arg.stack ? `\n${arg.stack}` : ''); | ||||
|             } else if (typeof (arg) === 'object') { | ||||
|                 try { | ||||
|  |  | |||
|  | @ -21,6 +21,7 @@ import {SimpleObservable} from "matrix-widget-api"; | |||
| import {IDestroyable} from "../utils/IDestroyable"; | ||||
| import {PlaybackClock} from "./PlaybackClock"; | ||||
| import {clamp} from "../utils/numbers"; | ||||
| import {createAudioContext, decodeOgg} from "./compat"; | ||||
| 
 | ||||
| export enum PlaybackState { | ||||
|     Decoding = "decoding", | ||||
|  | @ -49,7 +50,7 @@ export class Playback extends EventEmitter implements IDestroyable { | |||
|      */ | ||||
|     constructor(private buf: ArrayBuffer, seedWaveform = DEFAULT_WAVEFORM) { | ||||
|         super(); | ||||
|         this.context = new AudioContext(); | ||||
|         this.context = createAudioContext(); | ||||
|         this.resampledWaveform = arrayFastResample(seedWaveform ?? DEFAULT_WAVEFORM, PLAYBACK_WAVEFORM_SAMPLES); | ||||
|         this.waveformObservable.update(this.resampledWaveform); | ||||
|         this.clock = new PlaybackClock(this.context); | ||||
|  | @ -91,7 +92,23 @@ export class Playback extends EventEmitter implements IDestroyable { | |||
|     } | ||||
| 
 | ||||
|     public async prepare() { | ||||
|         this.audioBuf = await this.context.decodeAudioData(this.buf); | ||||
|         // Safari compat: promise API not supported on this function
 | ||||
|         this.audioBuf = await new Promise((resolve, reject) => { | ||||
|             this.context.decodeAudioData(this.buf, b => resolve(b), async e => { | ||||
|                 // This error handler is largely for Safari as well, which doesn't support Opus/Ogg
 | ||||
|                 // very well.
 | ||||
|                 console.error("Error decoding recording: ", e); | ||||
|                 console.warn("Trying to re-encode to WAV instead..."); | ||||
| 
 | ||||
|                 const wav = await decodeOgg(this.buf); | ||||
| 
 | ||||
|                 // noinspection ES6MissingAwait - not needed when using callbacks
 | ||||
|                 this.context.decodeAudioData(wav, b => resolve(b), e => { | ||||
|                     console.error("Still failed to decode recording: ", e); | ||||
|                     reject(e); | ||||
|                 }); | ||||
|             }); | ||||
|         }); | ||||
| 
 | ||||
|         // Update the waveform to the real waveform once we have channel data to use. We don't
 | ||||
|         // exactly trust the user-provided waveform to be accurate...
 | ||||
|  |  | |||
|  | @ -19,16 +19,17 @@ import encoderPath from 'opus-recorder/dist/encoderWorker.min.js'; | |||
| import {MatrixClient} from "matrix-js-sdk/src/client"; | ||||
| import CallMediaHandler from "../CallMediaHandler"; | ||||
| import {SimpleObservable} from "matrix-widget-api"; | ||||
| import {clamp} from "../utils/numbers"; | ||||
| import {clamp, percentageOf, percentageWithin} from "../utils/numbers"; | ||||
| import EventEmitter from "events"; | ||||
| import {IDestroyable} from "../utils/IDestroyable"; | ||||
| import {Singleflight} from "../utils/Singleflight"; | ||||
| import {PayloadEvent, WORKLET_NAME} from "./consts"; | ||||
| import {UPDATE_EVENT} from "../stores/AsyncStore"; | ||||
| import {Playback} from "./Playback"; | ||||
| import {createAudioContext} from "./compat"; | ||||
| 
 | ||||
| const CHANNELS = 1; // stereo isn't important
 | ||||
| const SAMPLE_RATE = 48000; // 48khz is what WebRTC uses. 12khz is where we lose quality.
 | ||||
| export const SAMPLE_RATE = 48000; // 48khz is what WebRTC uses. 12khz is where we lose quality.
 | ||||
| const BITRATE = 24000; // 24kbps is pretty high quality for our use case in opus.
 | ||||
| const TARGET_MAX_LENGTH = 120; // 2 minutes in seconds. Somewhat arbitrary, though longer == larger files.
 | ||||
| const TARGET_WARN_TIME_LEFT = 10; // 10 seconds, also somewhat arbitrary.
 | ||||
|  | @ -55,6 +56,7 @@ export class VoiceRecording extends EventEmitter implements IDestroyable { | |||
|     private recorderStream: MediaStream; | ||||
|     private recorderFFT: AnalyserNode; | ||||
|     private recorderWorklet: AudioWorkletNode; | ||||
|     private recorderProcessor: ScriptProcessorNode; | ||||
|     private buffer = new Uint8Array(0); // use this.audioBuffer to access
 | ||||
|     private mxc: string; | ||||
|     private recording = false; | ||||
|  | @ -90,78 +92,107 @@ export class VoiceRecording extends EventEmitter implements IDestroyable { | |||
|     } | ||||
| 
 | ||||
|     private async makeRecorder() { | ||||
|         this.recorderStream = await navigator.mediaDevices.getUserMedia({ | ||||
|             audio: { | ||||
|                 channelCount: CHANNELS, | ||||
|                 noiseSuppression: true, // browsers ignore constraints they can't honour
 | ||||
|                 deviceId: CallMediaHandler.getAudioInput(), | ||||
|             }, | ||||
|         }); | ||||
|         this.recorderContext = new AudioContext({ | ||||
|             // latencyHint: "interactive", // we don't want a latency hint (this causes data smoothing)
 | ||||
|         }); | ||||
|         this.recorderSource = this.recorderContext.createMediaStreamSource(this.recorderStream); | ||||
|         this.recorderFFT = this.recorderContext.createAnalyser(); | ||||
|         try { | ||||
|             this.recorderStream = await navigator.mediaDevices.getUserMedia({ | ||||
|                 audio: { | ||||
|                     channelCount: CHANNELS, | ||||
|                     noiseSuppression: true, // browsers ignore constraints they can't honour
 | ||||
|                     deviceId: CallMediaHandler.getAudioInput(), | ||||
|                 }, | ||||
|             }); | ||||
|             this.recorderContext = createAudioContext({ | ||||
|                 // latencyHint: "interactive", // we don't want a latency hint (this causes data smoothing)
 | ||||
|             }); | ||||
|             this.recorderSource = this.recorderContext.createMediaStreamSource(this.recorderStream); | ||||
|             this.recorderFFT = this.recorderContext.createAnalyser(); | ||||
| 
 | ||||
|         // Bring the FFT time domain down a bit. The default is 2048, and this must be a power
 | ||||
|         // of two. We use 64 points because we happen to know down the line we need less than
 | ||||
|         // that, but 32 would be too few. Large numbers are not helpful here and do not add
 | ||||
|         // precision: they introduce higher precision outputs of the FFT (frequency data), but
 | ||||
|         // it makes the time domain less than helpful.
 | ||||
|         this.recorderFFT.fftSize = 64; | ||||
|             // Bring the FFT time domain down a bit. The default is 2048, and this must be a power
 | ||||
|             // of two. We use 64 points because we happen to know down the line we need less than
 | ||||
|             // that, but 32 would be too few. Large numbers are not helpful here and do not add
 | ||||
|             // precision: they introduce higher precision outputs of the FFT (frequency data), but
 | ||||
|             // it makes the time domain less than helpful.
 | ||||
|             this.recorderFFT.fftSize = 64; | ||||
| 
 | ||||
|         // Set up our worklet. We use this for timing information and waveform analysis: the
 | ||||
|         // web audio API prefers this be done async to avoid holding the main thread with math.
 | ||||
|         const mxRecorderWorkletPath = document.body.dataset.vectorRecorderWorkletScript; | ||||
|         if (!mxRecorderWorkletPath) { | ||||
|             throw new Error("Unable to create recorder: no worklet script registered"); | ||||
|         } | ||||
|         await this.recorderContext.audioWorklet.addModule(mxRecorderWorkletPath); | ||||
|         this.recorderWorklet = new AudioWorkletNode(this.recorderContext, WORKLET_NAME); | ||||
| 
 | ||||
|         // Connect our inputs and outputs
 | ||||
|         this.recorderSource.connect(this.recorderFFT); | ||||
|         this.recorderSource.connect(this.recorderWorklet); | ||||
|         this.recorderWorklet.connect(this.recorderContext.destination); | ||||
| 
 | ||||
|         // Dev note: we can't use `addEventListener` for some reason. It just doesn't work.
 | ||||
|         this.recorderWorklet.port.onmessage = (ev) => { | ||||
|             switch (ev.data['ev']) { | ||||
|                 case PayloadEvent.Timekeep: | ||||
|                     this.processAudioUpdate(ev.data['timeSeconds']); | ||||
|                     break; | ||||
|                 case PayloadEvent.AmplitudeMark: | ||||
|                     // Sanity check to make sure we're adding about one sample per second
 | ||||
|                     if (ev.data['forSecond'] === this.amplitudes.length) { | ||||
|                         this.amplitudes.push(ev.data['amplitude']); | ||||
|                     } | ||||
|                     break; | ||||
|             // Set up our worklet. We use this for timing information and waveform analysis: the
 | ||||
|             // web audio API prefers this be done async to avoid holding the main thread with math.
 | ||||
|             const mxRecorderWorkletPath = document.body.dataset.vectorRecorderWorkletScript; | ||||
|             if (!mxRecorderWorkletPath) { | ||||
|                 // noinspection ExceptionCaughtLocallyJS
 | ||||
|                 throw new Error("Unable to create recorder: no worklet script registered"); | ||||
|             } | ||||
|         }; | ||||
| 
 | ||||
|         this.recorder = new Recorder({ | ||||
|             encoderPath, // magic from webpack
 | ||||
|             encoderSampleRate: SAMPLE_RATE, | ||||
|             encoderApplication: 2048, // voice (default is "audio")
 | ||||
|             streamPages: true, // this speeds up the encoding process by using CPU over time
 | ||||
|             encoderFrameSize: 20, // ms, arbitrary frame size we send to the encoder
 | ||||
|             numberOfChannels: CHANNELS, | ||||
|             sourceNode: this.recorderSource, | ||||
|             encoderBitRate: BITRATE, | ||||
|             // Connect our inputs and outputs
 | ||||
|             this.recorderSource.connect(this.recorderFFT); | ||||
| 
 | ||||
|             // We use low values for the following to ease CPU usage - the resulting waveform
 | ||||
|             // is indistinguishable for a voice message. Note that the underlying library will
 | ||||
|             // pick defaults which prefer the highest possible quality, CPU be damned.
 | ||||
|             encoderComplexity: 3, // 0-10, 10 is slow and high quality.
 | ||||
|             resampleQuality: 3, // 0-10, 10 is slow and high quality
 | ||||
|         }); | ||||
|         this.recorder.ondataavailable = (a: ArrayBuffer) => { | ||||
|             const buf = new Uint8Array(a); | ||||
|             const newBuf = new Uint8Array(this.buffer.length + buf.length); | ||||
|             newBuf.set(this.buffer, 0); | ||||
|             newBuf.set(buf, this.buffer.length); | ||||
|             this.buffer = newBuf; | ||||
|         }; | ||||
|             if (this.recorderContext.audioWorklet) { | ||||
|                 await this.recorderContext.audioWorklet.addModule(mxRecorderWorkletPath); | ||||
|                 this.recorderWorklet = new AudioWorkletNode(this.recorderContext, WORKLET_NAME); | ||||
|                 this.recorderSource.connect(this.recorderWorklet); | ||||
|                 this.recorderWorklet.connect(this.recorderContext.destination); | ||||
| 
 | ||||
|                 // Dev note: we can't use `addEventListener` for some reason. It just doesn't work.
 | ||||
|                 this.recorderWorklet.port.onmessage = (ev) => { | ||||
|                     switch (ev.data['ev']) { | ||||
|                         case PayloadEvent.Timekeep: | ||||
|                             this.processAudioUpdate(ev.data['timeSeconds']); | ||||
|                             break; | ||||
|                         case PayloadEvent.AmplitudeMark: | ||||
|                             // Sanity check to make sure we're adding about one sample per second
 | ||||
|                             if (ev.data['forSecond'] === this.amplitudes.length) { | ||||
|                                 this.amplitudes.push(ev.data['amplitude']); | ||||
|                             } | ||||
|                             break; | ||||
|                     } | ||||
|                 }; | ||||
|             } else { | ||||
|                 // Safari fallback: use a processor node instead, buffered to 1024 bytes of data
 | ||||
|                 // like the worklet is.
 | ||||
|                 this.recorderProcessor = this.recorderContext.createScriptProcessor(1024, CHANNELS, CHANNELS); | ||||
|                 this.recorderSource.connect(this.recorderProcessor); | ||||
|                 this.recorderProcessor.connect(this.recorderContext.destination); | ||||
|                 this.recorderProcessor.addEventListener("audioprocess", this.onAudioProcess); | ||||
|             } | ||||
| 
 | ||||
|             this.recorder = new Recorder({ | ||||
|                 encoderPath, // magic from webpack
 | ||||
|                 encoderSampleRate: SAMPLE_RATE, | ||||
|                 encoderApplication: 2048, // voice (default is "audio")
 | ||||
|                 streamPages: true, // this speeds up the encoding process by using CPU over time
 | ||||
|                 encoderFrameSize: 20, // ms, arbitrary frame size we send to the encoder
 | ||||
|                 numberOfChannels: CHANNELS, | ||||
|                 sourceNode: this.recorderSource, | ||||
|                 encoderBitRate: BITRATE, | ||||
| 
 | ||||
|                 // We use low values for the following to ease CPU usage - the resulting waveform
 | ||||
|                 // is indistinguishable for a voice message. Note that the underlying library will
 | ||||
|                 // pick defaults which prefer the highest possible quality, CPU be damned.
 | ||||
|                 encoderComplexity: 3, // 0-10, 10 is slow and high quality.
 | ||||
|                 resampleQuality: 3, // 0-10, 10 is slow and high quality
 | ||||
|             }); | ||||
|             this.recorder.ondataavailable = (a: ArrayBuffer) => { | ||||
|                 const buf = new Uint8Array(a); | ||||
|                 const newBuf = new Uint8Array(this.buffer.length + buf.length); | ||||
|                 newBuf.set(this.buffer, 0); | ||||
|                 newBuf.set(buf, this.buffer.length); | ||||
|                 this.buffer = newBuf; | ||||
|             }; | ||||
|         } catch (e) { | ||||
|             console.error("Error starting recording: ", e); | ||||
|             if (e instanceof DOMException) { // Unhelpful DOMExceptions are common - parse them sanely
 | ||||
|                 console.error(`${e.name} (${e.code}): ${e.message}`); | ||||
|             } | ||||
| 
 | ||||
|             // Clean up as best as possible
 | ||||
|             if (this.recorderStream) this.recorderStream.getTracks().forEach(t => t.stop()); | ||||
|             if (this.recorderSource) this.recorderSource.disconnect(); | ||||
|             if (this.recorder) this.recorder.close(); | ||||
|             if (this.recorderContext) { | ||||
|                 // noinspection ES6MissingAwait - not important that we wait
 | ||||
|                 this.recorderContext.close(); | ||||
|             } | ||||
| 
 | ||||
|             throw e; // rethrow so upstream can handle it
 | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     private get audioBuffer(): Uint8Array { | ||||
|  | @ -190,6 +221,13 @@ export class VoiceRecording extends EventEmitter implements IDestroyable { | |||
|         return this.mxc; | ||||
|     } | ||||
| 
 | ||||
|     private onAudioProcess = (ev: AudioProcessingEvent) => { | ||||
|         this.processAudioUpdate(ev.playbackTime); | ||||
| 
 | ||||
|         // We skip the functionality of the worklet regarding waveform calculations: we
 | ||||
|         // should get that information pretty quick during the playback info.
 | ||||
|     }; | ||||
| 
 | ||||
|     private processAudioUpdate = (timeSeconds: number) => { | ||||
|         if (!this.recording) return; | ||||
| 
 | ||||
|  | @ -197,7 +235,16 @@ export class VoiceRecording extends EventEmitter implements IDestroyable { | |||
|         // size. The time domain is also known as the audio waveform. We're ignoring the
 | ||||
|         // output of the FFT here (frequency data) because we're not interested in it.
 | ||||
|         const data = new Float32Array(this.recorderFFT.fftSize); | ||||
|         this.recorderFFT.getFloatTimeDomainData(data); | ||||
|         if (!this.recorderFFT.getFloatTimeDomainData) { | ||||
|             // Safari compat
 | ||||
|             const data2 = new Uint8Array(this.recorderFFT.fftSize); | ||||
|             this.recorderFFT.getByteTimeDomainData(data2); | ||||
|             for (let i = 0; i < data2.length; i++) { | ||||
|                 data[i] = percentageWithin(percentageOf(data2[i], 0, 256), -1, 1); | ||||
|             } | ||||
|         } else { | ||||
|             this.recorderFFT.getFloatTimeDomainData(data); | ||||
|         } | ||||
| 
 | ||||
|         // We can't just `Array.from()` the array because we're dealing with 32bit floats
 | ||||
|         // and the built-in function won't consider that when converting between numbers.
 | ||||
|  | @ -268,7 +315,11 @@ export class VoiceRecording extends EventEmitter implements IDestroyable { | |||
|             // Disconnect the source early to start shutting down resources
 | ||||
|             await this.recorder.stop(); // stop first to flush the last frame
 | ||||
|             this.recorderSource.disconnect(); | ||||
|             this.recorderWorklet.disconnect(); | ||||
|             if (this.recorderWorklet) this.recorderWorklet.disconnect(); | ||||
|             if (this.recorderProcessor) { | ||||
|                 this.recorderProcessor.disconnect(); | ||||
|                 this.recorderProcessor.removeEventListener("audioprocess", this.onAudioProcess); | ||||
|             } | ||||
| 
 | ||||
|             // close the context after the recorder so the recorder doesn't try to
 | ||||
|             // connect anything to the context (this would generate a warning)
 | ||||
|  |  | |||
|  | @ -0,0 +1,82 @@ | |||
| /* | ||||
| Copyright 2021 The Matrix.org Foundation C.I.C. | ||||
| 
 | ||||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| you may not use this file except in compliance with the License. | ||||
| You may obtain a copy of the License at | ||||
| 
 | ||||
|     http://www.apache.org/licenses/LICENSE-2.0
 | ||||
| 
 | ||||
| Unless required by applicable law or agreed to in writing, software | ||||
| distributed under the License is distributed on an "AS IS" BASIS, | ||||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| See the License for the specific language governing permissions and | ||||
| limitations under the License. | ||||
| */ | ||||
| 
 | ||||
| import {SAMPLE_RATE} from "./VoiceRecording"; | ||||
| 
 | ||||
| // @ts-ignore - we know that this is not a module. We're looking for a path.
 | ||||
| import decoderWasmPath from 'opus-recorder/dist/decoderWorker.min.wasm'; | ||||
| import wavEncoderPath from 'opus-recorder/dist/waveWorker.min.js'; | ||||
| import decoderPath from 'opus-recorder/dist/decoderWorker.min.js'; | ||||
| 
 | ||||
| export function createAudioContext(opts?: AudioContextOptions): AudioContext { | ||||
|     if (window.AudioContext) { | ||||
|         return new AudioContext(opts); | ||||
|     } else if (window.webkitAudioContext) { | ||||
|         // While the linter is correct that "a constructor name should not start with
 | ||||
|         // a lowercase letter", it's also wrong to think that we have control over this.
 | ||||
|         // eslint-disable-next-line new-cap
 | ||||
|         return new window.webkitAudioContext(opts); | ||||
|     } else { | ||||
|         throw new Error("Unsupported browser"); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| export function decodeOgg(audioBuffer: ArrayBuffer): Promise<ArrayBuffer> { | ||||
|     // Condensed version of decoder example, using a promise:
 | ||||
|     // https://github.com/chris-rudmin/opus-recorder/blob/master/example/decoder.html
 | ||||
|     return new Promise((resolve) => { // no reject because the workers don't seem to have a fail path
 | ||||
|         console.log("Decoder WASM path: " + decoderWasmPath); // so we use the variable (avoid tree shake)
 | ||||
|         const typedArray = new Uint8Array(audioBuffer); | ||||
|         const decoderWorker = new Worker(decoderPath); | ||||
|         const wavWorker = new Worker(wavEncoderPath); | ||||
| 
 | ||||
|         decoderWorker.postMessage({ | ||||
|             command: 'init', | ||||
|             decoderSampleRate: SAMPLE_RATE, | ||||
|             outputBufferSampleRate: SAMPLE_RATE, | ||||
|         }); | ||||
| 
 | ||||
|         wavWorker.postMessage({ | ||||
|             command: 'init', | ||||
|             wavBitDepth: 24, // standard for 48khz (SAMPLE_RATE)
 | ||||
|             wavSampleRate: SAMPLE_RATE, | ||||
|         }); | ||||
| 
 | ||||
|         decoderWorker.onmessage = (ev) => { | ||||
|             if (ev.data === null) { // null == done
 | ||||
|                 wavWorker.postMessage({command: 'done'}); | ||||
|                 return; | ||||
|             } | ||||
| 
 | ||||
|             wavWorker.postMessage({ | ||||
|                 command: 'encode', | ||||
|                 buffers: ev.data, | ||||
|             }, ev.data.map(b => b.buffer)); | ||||
|         }; | ||||
| 
 | ||||
|         wavWorker.onmessage = (ev) => { | ||||
|             if (ev.data.message === 'page') { | ||||
|                 // The encoding comes through as a single page
 | ||||
|                 resolve(new Blob([ev.data.page], {type: "audio/wav"}).arrayBuffer()); | ||||
|             } | ||||
|         }; | ||||
| 
 | ||||
|         decoderWorker.postMessage({ | ||||
|             command: 'decode', | ||||
|             pages: typedArray, | ||||
|         }, [typedArray.buffer]); | ||||
|     }); | ||||
| } | ||||
		Loading…
	
		Reference in New Issue
	
	 Travis Ralston
						Travis Ralston