Merge pull request #5989 from matrix-org/travis/voicemessages/safari
Support voice messages on Safaripull/21833/head
commit
d1cfde6f12
|
@ -0,0 +1,2 @@
|
|||
// Yes, this is empty.
|
||||
module.exports = {};
|
|
@ -186,7 +186,10 @@
|
|||
],
|
||||
"moduleNameMapper": {
|
||||
"\\.(gif|png|svg|ttf|woff2)$": "<rootDir>/__mocks__/imageMock.js",
|
||||
"\\$webapp/i18n/languages.json": "<rootDir>/__mocks__/languages.json"
|
||||
"\\$webapp/i18n/languages.json": "<rootDir>/__mocks__/languages.json",
|
||||
"decoderWorker\\.min\\.js": "<rootDir>/__mocks__/empty.js",
|
||||
"decoderWorker\\.min\\.wasm": "<rootDir>/__mocks__/empty.js",
|
||||
"waveWorker\\.min\\.js": "<rootDir>/__mocks__/empty.js"
|
||||
},
|
||||
"transformIgnorePatterns": [
|
||||
"/node_modules/(?!matrix-js-sdk).+$"
|
||||
|
|
|
@ -52,6 +52,9 @@ declare global {
|
|||
init: () => Promise<void>;
|
||||
};
|
||||
|
||||
// Needed for Safari, unknown to TypeScript
|
||||
webkitAudioContext: typeof AudioContext;
|
||||
|
||||
mxContentMessages: ContentMessages;
|
||||
mxToastStore: ToastStore;
|
||||
mxDeviceListener: DeviceListener;
|
||||
|
|
|
@ -73,7 +73,9 @@ class ConsoleLogger {
|
|||
|
||||
// Convert objects and errors to helpful things
|
||||
args = args.map((arg) => {
|
||||
if (arg instanceof Error) {
|
||||
if (arg instanceof DOMException) {
|
||||
return arg.message + ` (${arg.name} | ${arg.code}) ` + (arg.stack ? `\n${arg.stack}` : '');
|
||||
} else if (arg instanceof Error) {
|
||||
return arg.message + (arg.stack ? `\n${arg.stack}` : '');
|
||||
} else if (typeof (arg) === 'object') {
|
||||
try {
|
||||
|
|
|
@ -21,6 +21,7 @@ import {SimpleObservable} from "matrix-widget-api";
|
|||
import {IDestroyable} from "../utils/IDestroyable";
|
||||
import {PlaybackClock} from "./PlaybackClock";
|
||||
import {clamp} from "../utils/numbers";
|
||||
import {createAudioContext, decodeOgg} from "./compat";
|
||||
|
||||
export enum PlaybackState {
|
||||
Decoding = "decoding",
|
||||
|
@ -49,7 +50,7 @@ export class Playback extends EventEmitter implements IDestroyable {
|
|||
*/
|
||||
constructor(private buf: ArrayBuffer, seedWaveform = DEFAULT_WAVEFORM) {
|
||||
super();
|
||||
this.context = new AudioContext();
|
||||
this.context = createAudioContext();
|
||||
this.resampledWaveform = arrayFastResample(seedWaveform ?? DEFAULT_WAVEFORM, PLAYBACK_WAVEFORM_SAMPLES);
|
||||
this.waveformObservable.update(this.resampledWaveform);
|
||||
this.clock = new PlaybackClock(this.context);
|
||||
|
@ -91,7 +92,23 @@ export class Playback extends EventEmitter implements IDestroyable {
|
|||
}
|
||||
|
||||
public async prepare() {
|
||||
this.audioBuf = await this.context.decodeAudioData(this.buf);
|
||||
// Safari compat: promise API not supported on this function
|
||||
this.audioBuf = await new Promise((resolve, reject) => {
|
||||
this.context.decodeAudioData(this.buf, b => resolve(b), async e => {
|
||||
// This error handler is largely for Safari as well, which doesn't support Opus/Ogg
|
||||
// very well.
|
||||
console.error("Error decoding recording: ", e);
|
||||
console.warn("Trying to re-encode to WAV instead...");
|
||||
|
||||
const wav = await decodeOgg(this.buf);
|
||||
|
||||
// noinspection ES6MissingAwait - not needed when using callbacks
|
||||
this.context.decodeAudioData(wav, b => resolve(b), e => {
|
||||
console.error("Still failed to decode recording: ", e);
|
||||
reject(e);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// Update the waveform to the real waveform once we have channel data to use. We don't
|
||||
// exactly trust the user-provided waveform to be accurate...
|
||||
|
|
|
@ -19,16 +19,17 @@ import encoderPath from 'opus-recorder/dist/encoderWorker.min.js';
|
|||
import {MatrixClient} from "matrix-js-sdk/src/client";
|
||||
import CallMediaHandler from "../CallMediaHandler";
|
||||
import {SimpleObservable} from "matrix-widget-api";
|
||||
import {clamp} from "../utils/numbers";
|
||||
import {clamp, percentageOf, percentageWithin} from "../utils/numbers";
|
||||
import EventEmitter from "events";
|
||||
import {IDestroyable} from "../utils/IDestroyable";
|
||||
import {Singleflight} from "../utils/Singleflight";
|
||||
import {PayloadEvent, WORKLET_NAME} from "./consts";
|
||||
import {UPDATE_EVENT} from "../stores/AsyncStore";
|
||||
import {Playback} from "./Playback";
|
||||
import {createAudioContext} from "./compat";
|
||||
|
||||
const CHANNELS = 1; // stereo isn't important
|
||||
const SAMPLE_RATE = 48000; // 48khz is what WebRTC uses. 12khz is where we lose quality.
|
||||
export const SAMPLE_RATE = 48000; // 48khz is what WebRTC uses. 12khz is where we lose quality.
|
||||
const BITRATE = 24000; // 24kbps is pretty high quality for our use case in opus.
|
||||
const TARGET_MAX_LENGTH = 120; // 2 minutes in seconds. Somewhat arbitrary, though longer == larger files.
|
||||
const TARGET_WARN_TIME_LEFT = 10; // 10 seconds, also somewhat arbitrary.
|
||||
|
@ -55,6 +56,7 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
|
|||
private recorderStream: MediaStream;
|
||||
private recorderFFT: AnalyserNode;
|
||||
private recorderWorklet: AudioWorkletNode;
|
||||
private recorderProcessor: ScriptProcessorNode;
|
||||
private buffer = new Uint8Array(0); // use this.audioBuffer to access
|
||||
private mxc: string;
|
||||
private recording = false;
|
||||
|
@ -90,78 +92,107 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
|
|||
}
|
||||
|
||||
private async makeRecorder() {
|
||||
this.recorderStream = await navigator.mediaDevices.getUserMedia({
|
||||
audio: {
|
||||
channelCount: CHANNELS,
|
||||
noiseSuppression: true, // browsers ignore constraints they can't honour
|
||||
deviceId: CallMediaHandler.getAudioInput(),
|
||||
},
|
||||
});
|
||||
this.recorderContext = new AudioContext({
|
||||
// latencyHint: "interactive", // we don't want a latency hint (this causes data smoothing)
|
||||
});
|
||||
this.recorderSource = this.recorderContext.createMediaStreamSource(this.recorderStream);
|
||||
this.recorderFFT = this.recorderContext.createAnalyser();
|
||||
try {
|
||||
this.recorderStream = await navigator.mediaDevices.getUserMedia({
|
||||
audio: {
|
||||
channelCount: CHANNELS,
|
||||
noiseSuppression: true, // browsers ignore constraints they can't honour
|
||||
deviceId: CallMediaHandler.getAudioInput(),
|
||||
},
|
||||
});
|
||||
this.recorderContext = createAudioContext({
|
||||
// latencyHint: "interactive", // we don't want a latency hint (this causes data smoothing)
|
||||
});
|
||||
this.recorderSource = this.recorderContext.createMediaStreamSource(this.recorderStream);
|
||||
this.recorderFFT = this.recorderContext.createAnalyser();
|
||||
|
||||
// Bring the FFT time domain down a bit. The default is 2048, and this must be a power
|
||||
// of two. We use 64 points because we happen to know down the line we need less than
|
||||
// that, but 32 would be too few. Large numbers are not helpful here and do not add
|
||||
// precision: they introduce higher precision outputs of the FFT (frequency data), but
|
||||
// it makes the time domain less than helpful.
|
||||
this.recorderFFT.fftSize = 64;
|
||||
// Bring the FFT time domain down a bit. The default is 2048, and this must be a power
|
||||
// of two. We use 64 points because we happen to know down the line we need less than
|
||||
// that, but 32 would be too few. Large numbers are not helpful here and do not add
|
||||
// precision: they introduce higher precision outputs of the FFT (frequency data), but
|
||||
// it makes the time domain less than helpful.
|
||||
this.recorderFFT.fftSize = 64;
|
||||
|
||||
// Set up our worklet. We use this for timing information and waveform analysis: the
|
||||
// web audio API prefers this be done async to avoid holding the main thread with math.
|
||||
const mxRecorderWorkletPath = document.body.dataset.vectorRecorderWorkletScript;
|
||||
if (!mxRecorderWorkletPath) {
|
||||
throw new Error("Unable to create recorder: no worklet script registered");
|
||||
}
|
||||
await this.recorderContext.audioWorklet.addModule(mxRecorderWorkletPath);
|
||||
this.recorderWorklet = new AudioWorkletNode(this.recorderContext, WORKLET_NAME);
|
||||
|
||||
// Connect our inputs and outputs
|
||||
this.recorderSource.connect(this.recorderFFT);
|
||||
this.recorderSource.connect(this.recorderWorklet);
|
||||
this.recorderWorklet.connect(this.recorderContext.destination);
|
||||
|
||||
// Dev note: we can't use `addEventListener` for some reason. It just doesn't work.
|
||||
this.recorderWorklet.port.onmessage = (ev) => {
|
||||
switch (ev.data['ev']) {
|
||||
case PayloadEvent.Timekeep:
|
||||
this.processAudioUpdate(ev.data['timeSeconds']);
|
||||
break;
|
||||
case PayloadEvent.AmplitudeMark:
|
||||
// Sanity check to make sure we're adding about one sample per second
|
||||
if (ev.data['forSecond'] === this.amplitudes.length) {
|
||||
this.amplitudes.push(ev.data['amplitude']);
|
||||
}
|
||||
break;
|
||||
// Set up our worklet. We use this for timing information and waveform analysis: the
|
||||
// web audio API prefers this be done async to avoid holding the main thread with math.
|
||||
const mxRecorderWorkletPath = document.body.dataset.vectorRecorderWorkletScript;
|
||||
if (!mxRecorderWorkletPath) {
|
||||
// noinspection ExceptionCaughtLocallyJS
|
||||
throw new Error("Unable to create recorder: no worklet script registered");
|
||||
}
|
||||
};
|
||||
|
||||
this.recorder = new Recorder({
|
||||
encoderPath, // magic from webpack
|
||||
encoderSampleRate: SAMPLE_RATE,
|
||||
encoderApplication: 2048, // voice (default is "audio")
|
||||
streamPages: true, // this speeds up the encoding process by using CPU over time
|
||||
encoderFrameSize: 20, // ms, arbitrary frame size we send to the encoder
|
||||
numberOfChannels: CHANNELS,
|
||||
sourceNode: this.recorderSource,
|
||||
encoderBitRate: BITRATE,
|
||||
// Connect our inputs and outputs
|
||||
this.recorderSource.connect(this.recorderFFT);
|
||||
|
||||
// We use low values for the following to ease CPU usage - the resulting waveform
|
||||
// is indistinguishable for a voice message. Note that the underlying library will
|
||||
// pick defaults which prefer the highest possible quality, CPU be damned.
|
||||
encoderComplexity: 3, // 0-10, 10 is slow and high quality.
|
||||
resampleQuality: 3, // 0-10, 10 is slow and high quality
|
||||
});
|
||||
this.recorder.ondataavailable = (a: ArrayBuffer) => {
|
||||
const buf = new Uint8Array(a);
|
||||
const newBuf = new Uint8Array(this.buffer.length + buf.length);
|
||||
newBuf.set(this.buffer, 0);
|
||||
newBuf.set(buf, this.buffer.length);
|
||||
this.buffer = newBuf;
|
||||
};
|
||||
if (this.recorderContext.audioWorklet) {
|
||||
await this.recorderContext.audioWorklet.addModule(mxRecorderWorkletPath);
|
||||
this.recorderWorklet = new AudioWorkletNode(this.recorderContext, WORKLET_NAME);
|
||||
this.recorderSource.connect(this.recorderWorklet);
|
||||
this.recorderWorklet.connect(this.recorderContext.destination);
|
||||
|
||||
// Dev note: we can't use `addEventListener` for some reason. It just doesn't work.
|
||||
this.recorderWorklet.port.onmessage = (ev) => {
|
||||
switch (ev.data['ev']) {
|
||||
case PayloadEvent.Timekeep:
|
||||
this.processAudioUpdate(ev.data['timeSeconds']);
|
||||
break;
|
||||
case PayloadEvent.AmplitudeMark:
|
||||
// Sanity check to make sure we're adding about one sample per second
|
||||
if (ev.data['forSecond'] === this.amplitudes.length) {
|
||||
this.amplitudes.push(ev.data['amplitude']);
|
||||
}
|
||||
break;
|
||||
}
|
||||
};
|
||||
} else {
|
||||
// Safari fallback: use a processor node instead, buffered to 1024 bytes of data
|
||||
// like the worklet is.
|
||||
this.recorderProcessor = this.recorderContext.createScriptProcessor(1024, CHANNELS, CHANNELS);
|
||||
this.recorderSource.connect(this.recorderProcessor);
|
||||
this.recorderProcessor.connect(this.recorderContext.destination);
|
||||
this.recorderProcessor.addEventListener("audioprocess", this.onAudioProcess);
|
||||
}
|
||||
|
||||
this.recorder = new Recorder({
|
||||
encoderPath, // magic from webpack
|
||||
encoderSampleRate: SAMPLE_RATE,
|
||||
encoderApplication: 2048, // voice (default is "audio")
|
||||
streamPages: true, // this speeds up the encoding process by using CPU over time
|
||||
encoderFrameSize: 20, // ms, arbitrary frame size we send to the encoder
|
||||
numberOfChannels: CHANNELS,
|
||||
sourceNode: this.recorderSource,
|
||||
encoderBitRate: BITRATE,
|
||||
|
||||
// We use low values for the following to ease CPU usage - the resulting waveform
|
||||
// is indistinguishable for a voice message. Note that the underlying library will
|
||||
// pick defaults which prefer the highest possible quality, CPU be damned.
|
||||
encoderComplexity: 3, // 0-10, 10 is slow and high quality.
|
||||
resampleQuality: 3, // 0-10, 10 is slow and high quality
|
||||
});
|
||||
this.recorder.ondataavailable = (a: ArrayBuffer) => {
|
||||
const buf = new Uint8Array(a);
|
||||
const newBuf = new Uint8Array(this.buffer.length + buf.length);
|
||||
newBuf.set(this.buffer, 0);
|
||||
newBuf.set(buf, this.buffer.length);
|
||||
this.buffer = newBuf;
|
||||
};
|
||||
} catch (e) {
|
||||
console.error("Error starting recording: ", e);
|
||||
if (e instanceof DOMException) { // Unhelpful DOMExceptions are common - parse them sanely
|
||||
console.error(`${e.name} (${e.code}): ${e.message}`);
|
||||
}
|
||||
|
||||
// Clean up as best as possible
|
||||
if (this.recorderStream) this.recorderStream.getTracks().forEach(t => t.stop());
|
||||
if (this.recorderSource) this.recorderSource.disconnect();
|
||||
if (this.recorder) this.recorder.close();
|
||||
if (this.recorderContext) {
|
||||
// noinspection ES6MissingAwait - not important that we wait
|
||||
this.recorderContext.close();
|
||||
}
|
||||
|
||||
throw e; // rethrow so upstream can handle it
|
||||
}
|
||||
}
|
||||
|
||||
private get audioBuffer(): Uint8Array {
|
||||
|
@ -190,6 +221,13 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
|
|||
return this.mxc;
|
||||
}
|
||||
|
||||
private onAudioProcess = (ev: AudioProcessingEvent) => {
|
||||
this.processAudioUpdate(ev.playbackTime);
|
||||
|
||||
// We skip the functionality of the worklet regarding waveform calculations: we
|
||||
// should get that information pretty quick during the playback info.
|
||||
};
|
||||
|
||||
private processAudioUpdate = (timeSeconds: number) => {
|
||||
if (!this.recording) return;
|
||||
|
||||
|
@ -197,7 +235,16 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
|
|||
// size. The time domain is also known as the audio waveform. We're ignoring the
|
||||
// output of the FFT here (frequency data) because we're not interested in it.
|
||||
const data = new Float32Array(this.recorderFFT.fftSize);
|
||||
this.recorderFFT.getFloatTimeDomainData(data);
|
||||
if (!this.recorderFFT.getFloatTimeDomainData) {
|
||||
// Safari compat
|
||||
const data2 = new Uint8Array(this.recorderFFT.fftSize);
|
||||
this.recorderFFT.getByteTimeDomainData(data2);
|
||||
for (let i = 0; i < data2.length; i++) {
|
||||
data[i] = percentageWithin(percentageOf(data2[i], 0, 256), -1, 1);
|
||||
}
|
||||
} else {
|
||||
this.recorderFFT.getFloatTimeDomainData(data);
|
||||
}
|
||||
|
||||
// We can't just `Array.from()` the array because we're dealing with 32bit floats
|
||||
// and the built-in function won't consider that when converting between numbers.
|
||||
|
@ -268,7 +315,11 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
|
|||
// Disconnect the source early to start shutting down resources
|
||||
await this.recorder.stop(); // stop first to flush the last frame
|
||||
this.recorderSource.disconnect();
|
||||
this.recorderWorklet.disconnect();
|
||||
if (this.recorderWorklet) this.recorderWorklet.disconnect();
|
||||
if (this.recorderProcessor) {
|
||||
this.recorderProcessor.disconnect();
|
||||
this.recorderProcessor.removeEventListener("audioprocess", this.onAudioProcess);
|
||||
}
|
||||
|
||||
// close the context after the recorder so the recorder doesn't try to
|
||||
// connect anything to the context (this would generate a warning)
|
||||
|
|
|
@ -0,0 +1,82 @@
|
|||
/*
|
||||
Copyright 2021 The Matrix.org Foundation C.I.C.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
import {SAMPLE_RATE} from "./VoiceRecording";
|
||||
|
||||
// @ts-ignore - we know that this is not a module. We're looking for a path.
|
||||
import decoderWasmPath from 'opus-recorder/dist/decoderWorker.min.wasm';
|
||||
import wavEncoderPath from 'opus-recorder/dist/waveWorker.min.js';
|
||||
import decoderPath from 'opus-recorder/dist/decoderWorker.min.js';
|
||||
|
||||
export function createAudioContext(opts?: AudioContextOptions): AudioContext {
|
||||
if (window.AudioContext) {
|
||||
return new AudioContext(opts);
|
||||
} else if (window.webkitAudioContext) {
|
||||
// While the linter is correct that "a constructor name should not start with
|
||||
// a lowercase letter", it's also wrong to think that we have control over this.
|
||||
// eslint-disable-next-line new-cap
|
||||
return new window.webkitAudioContext(opts);
|
||||
} else {
|
||||
throw new Error("Unsupported browser");
|
||||
}
|
||||
}
|
||||
|
||||
export function decodeOgg(audioBuffer: ArrayBuffer): Promise<ArrayBuffer> {
|
||||
// Condensed version of decoder example, using a promise:
|
||||
// https://github.com/chris-rudmin/opus-recorder/blob/master/example/decoder.html
|
||||
return new Promise((resolve) => { // no reject because the workers don't seem to have a fail path
|
||||
console.log("Decoder WASM path: " + decoderWasmPath); // so we use the variable (avoid tree shake)
|
||||
const typedArray = new Uint8Array(audioBuffer);
|
||||
const decoderWorker = new Worker(decoderPath);
|
||||
const wavWorker = new Worker(wavEncoderPath);
|
||||
|
||||
decoderWorker.postMessage({
|
||||
command: 'init',
|
||||
decoderSampleRate: SAMPLE_RATE,
|
||||
outputBufferSampleRate: SAMPLE_RATE,
|
||||
});
|
||||
|
||||
wavWorker.postMessage({
|
||||
command: 'init',
|
||||
wavBitDepth: 24, // standard for 48khz (SAMPLE_RATE)
|
||||
wavSampleRate: SAMPLE_RATE,
|
||||
});
|
||||
|
||||
decoderWorker.onmessage = (ev) => {
|
||||
if (ev.data === null) { // null == done
|
||||
wavWorker.postMessage({command: 'done'});
|
||||
return;
|
||||
}
|
||||
|
||||
wavWorker.postMessage({
|
||||
command: 'encode',
|
||||
buffers: ev.data,
|
||||
}, ev.data.map(b => b.buffer));
|
||||
};
|
||||
|
||||
wavWorker.onmessage = (ev) => {
|
||||
if (ev.data.message === 'page') {
|
||||
// The encoding comes through as a single page
|
||||
resolve(new Blob([ev.data.page], {type: "audio/wav"}).arrayBuffer());
|
||||
}
|
||||
};
|
||||
|
||||
decoderWorker.postMessage({
|
||||
command: 'decode',
|
||||
pages: typedArray,
|
||||
}, [typedArray.buffer]);
|
||||
});
|
||||
}
|
Loading…
Reference in New Issue