From 07a5a1dc6fb02ff961891ddadadcd2725e8bdcf8 Mon Sep 17 00:00:00 2001
From: David Baker <dbkr@users.noreply.github.com>
Date: Fri, 30 Sep 2022 17:28:53 +0100
Subject: [PATCH] Fix device selection in pre-join screen for Element Call
 video rooms (#9321)

* Fix device selection in pre-join screen for Element Call video rooms

As per https://github.com/vector-im/element-call/pull/609

* Update unit test

* Lint

* Hold a media stream while we enumerate device so we can do so reliably.

This means we can remove the device fallback labels.

* i18n

* Remove unnecessary useState

* Fix fetching video devices when video muted

* Actually fix preview stream code

* Fix unit test now fallback is no longer a thing

* Test changing devices
---
 src/MediaDeviceHandler.ts                    | 16 +++-
 src/components/views/voip/CallView.tsx       | 77 ++++++++++---------
 src/i18n/strings/en_EN.json                  |  2 -
 src/models/Call.ts                           |  4 +-
 test/components/views/voip/CallView-test.tsx | 80 ++++++++++++++------
 test/models/Call-test.ts                     |  4 +-
 test/test-utils/test-utils.ts                |  6 ++
 7 files changed, 123 insertions(+), 66 deletions(-)
diff --git a/src/MediaDeviceHandler.ts b/src/MediaDeviceHandler.ts
index a5160484c8..0e6d2b98bc 100644
--- a/src/MediaDeviceHandler.ts
+++ b/src/MediaDeviceHandler.ts
@@ -50,10 +50,20 @@ export default class MediaDeviceHandler extends EventEmitter {
         return devices.some(d => Boolean(d.label));
     }
 
+    /**
+     * Gets the available audio input/output and video input devices
+     * from the browser: a thin wrapper around mediaDevices.enumerateDevices()
+     * that also returns results by type of devices. Note that this requires
+     * user media permissions and an active stream, otherwise you'll get blank
+     * device labels.
+     *
+     * Once the Permissions API
+     * (https://developer.mozilla.org/en-US/docs/Web/API/Permissions_API)
+     * is ready for primetime, it might help make this simpler.
+     *
+     * @return Promise<IMediaDevices> The available media devices
+     */
     public static async getDevices(): Promise<IMediaDevices> {
-        // Only needed for Electron atm, though should work in modern browsers
-        // once permission has been granted to the webapp
-
         try {
             const devices = await navigator.mediaDevices.enumerateDevices();
             const output = {
diff --git a/src/components/views/voip/CallView.tsx b/src/components/views/voip/CallView.tsx
index f85296f1d0..fbab581e0f 100644
--- a/src/components/views/voip/CallView.tsx
+++ b/src/components/views/voip/CallView.tsx
@@ -45,7 +45,6 @@ interface DeviceButtonProps {
     devices: MediaDeviceInfo[];
     setDevice: (device: MediaDeviceInfo) => void;
     deviceListLabel: string;
-    fallbackDeviceLabel: (n: number) => string;
     muted: boolean;
     disabled: boolean;
     toggle: () => void;
@@ -54,7 +53,7 @@ interface DeviceButtonProps {
 }
 
 const DeviceButton: FC<DeviceButtonProps> = ({
-    kind, devices, setDevice, deviceListLabel, fallbackDeviceLabel, muted, disabled, toggle, unmutedTitle, mutedTitle,
+    kind, devices, setDevice, deviceListLabel, muted, disabled, toggle, unmutedTitle, mutedTitle,
 }) => {
     const [showMenu, buttonRef, openMenu, closeMenu] = useContextMenu();
     const selectDevice = useCallback((device: MediaDeviceInfo) => {
@@ -67,10 +66,10 @@ const DeviceButton: FC<DeviceButtonProps> = ({
         const buttonRect = buttonRef.current!.getBoundingClientRect();
         contextMenu = <IconizedContextMenu {...aboveLeftOf(buttonRect)} onFinished={closeMenu}>
             <IconizedContextMenuOptionList>
-                { devices.map((d, index) =>
+                { devices.map((d) =>
                     <IconizedContextMenuOption
                         key={d.deviceId}
-                        label={d.label || fallbackDeviceLabel(index + 1)}
+                        label={d.label}
                         onClick={() => selectDevice(d)}
                     />,
                 ) }
@@ -119,26 +118,8 @@ export const Lobby: FC<LobbyProps> = ({ room, connect, children }) => {
     const me = useMemo(() => room.getMember(room.myUserId)!, [room]);
     const videoRef = useRef<HTMLVideoElement>(null);
 
-    const [audioInputs, videoInputs] = useAsyncMemo(async () => {
-        try {
-            const devices = await MediaDeviceHandler.getDevices();
-            return [devices[MediaDeviceKindEnum.AudioInput], devices[MediaDeviceKindEnum.VideoInput]];
-        } catch (e) {
-            logger.warn(`Failed to get media device list`, e);
-            return [[], []];
-        }
-    }, [], [[], []]);
-
     const [videoInputId, setVideoInputId] = useState<string>(() => MediaDeviceHandler.getVideoInput());
 
-    const setAudioInput = useCallback((device: MediaDeviceInfo) => {
-        MediaDeviceHandler.instance.setAudioInput(device.deviceId);
-    }, []);
-    const setVideoInput = useCallback((device: MediaDeviceInfo) => {
-        MediaDeviceHandler.instance.setVideoInput(device.deviceId);
-        setVideoInputId(device.deviceId);
-    }, []);
-
     const [audioMuted, setAudioMuted] = useState(() => MediaDeviceHandler.startWithAudioMuted);
     const [videoMuted, setVideoMuted] = useState(() => MediaDeviceHandler.startWithVideoMuted);
 
@@ -151,18 +132,46 @@ export const Lobby: FC<LobbyProps> = ({ room, connect, children }) => {
         setVideoMuted(!videoMuted);
     }, [videoMuted, setVideoMuted]);
 
-    const videoStream = useAsyncMemo(async () => {
-        if (videoInputId && !videoMuted) {
-            try {
-                return await navigator.mediaDevices.getUserMedia({
-                    video: { deviceId: videoInputId },
-                });
-            } catch (e) {
-                logger.error(`Failed to get stream for device ${videoInputId}`, e);
-            }
+    const [videoStream, audioInputs, videoInputs] = useAsyncMemo(async () => {
+        let previewStream: MediaStream;
+        try {
+            // We get the preview stream before requesting devices: this is because
+            // we need (in some browsers) an active media stream in order to get
+            // non-blank labels for the devices. According to the docs, we
+            // need a stream of each type (audio + video) if we want to enumerate
+            // audio & video devices, although this didn't seem to be the case
+            // in practice for me. We request both anyway.
+            // For similar reasons, we also request a stream even if video is muted,
+            // which could be a bit strange but allows us to get the device list
+            // reliably. One option could be to try & get devices without a stream,
+            // then try again with a stream if we get blank deviceids, but... ew.
+            previewStream = await navigator.mediaDevices.getUserMedia({
+                video: { deviceId: videoInputId },
+                audio: { deviceId: MediaDeviceHandler.getAudioInput() },
+            });
+        } catch (e) {
+            logger.error(`Failed to get stream for device ${videoInputId}`, e);
         }
-        return null;
-    }, [videoInputId, videoMuted]);
+
+        const devices = await MediaDeviceHandler.getDevices();
+
+        // If video is muted, we don't actually want the stream, so we can get rid of
+        // it now.
+        if (videoMuted) {
+            previewStream.getTracks().forEach(t => t.stop());
+            previewStream = undefined;
+        }
+
+        return [previewStream, devices[MediaDeviceKindEnum.AudioInput], devices[MediaDeviceKindEnum.VideoInput]];
+    }, [videoInputId, videoMuted], [null, [], []]);
+
+    const setAudioInput = useCallback((device: MediaDeviceInfo) => {
+        MediaDeviceHandler.instance.setAudioInput(device.deviceId);
+    }, []);
+    const setVideoInput = useCallback((device: MediaDeviceInfo) => {
+        MediaDeviceHandler.instance.setVideoInput(device.deviceId);
+        setVideoInputId(device.deviceId);
+    }, []);
 
     useEffect(() => {
         if (videoStream) {
@@ -205,7 +214,6 @@ export const Lobby: FC<LobbyProps> = ({ room, connect, children }) => {
                     devices={audioInputs}
                     setDevice={setAudioInput}
                     deviceListLabel={_t("Audio devices")}
-                    fallbackDeviceLabel={n => _t("Audio input %(n)s", { n })}
                     muted={audioMuted}
                     disabled={connecting}
                     toggle={toggleAudio}
@@ -217,7 +225,6 @@ export const Lobby: FC<LobbyProps> = ({ room, connect, children }) => {
                     devices={videoInputs}
                     setDevice={setVideoInput}
                     deviceListLabel={_t("Video devices")}
-                    fallbackDeviceLabel={n => _t("Video input %(n)s", { n })}
                     muted={videoMuted}
                     disabled={connecting}
                     toggle={toggleVideo}
diff --git a/src/i18n/strings/en_EN.json b/src/i18n/strings/en_EN.json
index cba3340ca8..1a051cf8e1 100644
--- a/src/i18n/strings/en_EN.json
+++ b/src/i18n/strings/en_EN.json
@@ -1047,11 +1047,9 @@
     "Hint: Begin your message with <code>//</code> to start it with a slash.": "Hint: Begin your message with <code>//</code> to start it with a slash.",
     "Send as message": "Send as message",
     "Audio devices": "Audio devices",
-    "Audio input %(n)s": "Audio input %(n)s",
     "Mute microphone": "Mute microphone",
     "Unmute microphone": "Unmute microphone",
     "Video devices": "Video devices",
-    "Video input %(n)s": "Video input %(n)s",
     "Turn off camera": "Turn off camera",
     "Turn on camera": "Turn on camera",
     "Join": "Join",
diff --git a/src/models/Call.ts b/src/models/Call.ts
index b51a49cc9e..417cf16291 100644
--- a/src/models/Call.ts
+++ b/src/models/Call.ts
@@ -771,8 +771,8 @@ export class ElementCall extends Call {
     ): Promise<void> {
         try {
             await this.messaging!.transport.send(ElementWidgetActions.JoinCall, {
-                audioInput: audioInput?.deviceId ?? null,
-                videoInput: videoInput?.deviceId ?? null,
+                audioInput: audioInput?.label ?? null,
+                videoInput: videoInput?.label ?? null,
             });
         } catch (e) {
             throw new Error(`Failed to join call in room ${this.roomId}: ${e}`);
diff --git a/test/components/views/voip/CallView-test.tsx b/test/components/views/voip/CallView-test.tsx
index 827b4d29c1..24d26e5d52 100644
--- a/test/components/views/voip/CallView-test.tsx
+++ b/test/components/views/voip/CallView-test.tsx
@@ -187,6 +187,35 @@ describe("CallLobby", () => {
     });
 
     describe("device buttons", () => {
+        const fakeVideoInput1: MediaDeviceInfo = {
+            deviceId: "v1",
+            groupId: "v1",
+            label: "Webcam",
+            kind: "videoinput",
+            toJSON: () => {},
+        };
+        const fakeVideoInput2: MediaDeviceInfo = {
+            deviceId: "v2",
+            groupId: "v2",
+            label: "Othercam",
+            kind: "videoinput",
+            toJSON: () => {},
+        };
+        const fakeAudioInput1: MediaDeviceInfo = {
+            deviceId: "v1",
+            groupId: "v1",
+            label: "Headphones",
+            kind: "audioinput",
+            toJSON: () => {},
+        };
+        const fakeAudioInput2: MediaDeviceInfo = {
+            deviceId: "v2",
+            groupId: "v2",
+            label: "Tailphones",
+            kind: "audioinput",
+            toJSON: () => {},
+        };
+
         it("hide when no devices are available", async () => {
             await renderView();
             expect(screen.queryByRole("button", { name: /microphone/ })).toBe(null);
@@ -194,13 +223,7 @@ describe("CallLobby", () => {
         });
 
         it("show without dropdown when only one device is available", async () => {
-            mocked(navigator.mediaDevices.enumerateDevices).mockResolvedValue([{
-                deviceId: "1",
-                groupId: "1",
-                label: "Webcam",
-                kind: "videoinput",
-                toJSON: () => {},
-            }]);
+            mocked(navigator.mediaDevices.enumerateDevices).mockResolvedValue([fakeVideoInput1]);
 
             await renderView();
             screen.getByRole("button", { name: /camera/ });
@@ -209,27 +232,40 @@ describe("CallLobby", () => {
 
         it("show with dropdown when multiple devices are available", async () => {
             mocked(navigator.mediaDevices.enumerateDevices).mockResolvedValue([
-                {
-                    deviceId: "1",
-                    groupId: "1",
-                    label: "Headphones",
-                    kind: "audioinput",
-                    toJSON: () => {},
-                },
-                {
-                    deviceId: "2",
-                    groupId: "1",
-                    label: "", // Should fall back to "Audio input 2"
-                    kind: "audioinput",
-                    toJSON: () => {},
-                },
+                fakeAudioInput1, fakeAudioInput2,
             ]);
 
             await renderView();
             screen.getByRole("button", { name: /microphone/ });
             fireEvent.click(screen.getByRole("button", { name: "Audio devices" }));
             screen.getByRole("menuitem", { name: "Headphones" });
-            screen.getByRole("menuitem", { name: "Audio input 2" });
+            screen.getByRole("menuitem", { name: "Tailphones" });
+        });
+
+        it("sets video device when selected", async () => {
+            mocked(navigator.mediaDevices.enumerateDevices).mockResolvedValue([
+                fakeVideoInput1, fakeVideoInput2,
+            ]);
+
+            await renderView();
+            screen.getByRole("button", { name: /camera/ });
+            fireEvent.click(screen.getByRole("button", { name: "Video devices" }));
+            fireEvent.click(screen.getByRole("menuitem", { name: fakeVideoInput2.label }));
+
+            expect(client.getMediaHandler().setVideoInput).toHaveBeenCalledWith(fakeVideoInput2.deviceId);
+        });
+
+        it("sets audio device when selected", async () => {
+            mocked(navigator.mediaDevices.enumerateDevices).mockResolvedValue([
+                fakeAudioInput1, fakeAudioInput2,
+            ]);
+
+            await renderView();
+            screen.getByRole("button", { name: /microphone/ });
+            fireEvent.click(screen.getByRole("button", { name: "Audio devices" }));
+            fireEvent.click(screen.getByRole("menuitem", { name: fakeAudioInput2.label }));
+
+            expect(client.getMediaHandler().setAudioInput).toHaveBeenCalledWith(fakeAudioInput2.deviceId);
         });
     });
 });
diff --git a/test/models/Call-test.ts b/test/models/Call-test.ts
index 2ed56e3fcc..83c0456b80 100644
--- a/test/models/Call-test.ts
+++ b/test/models/Call-test.ts
@@ -616,8 +616,8 @@ describe("ElementCall", () => {
             await call.connect();
             expect(call.connectionState).toBe(ConnectionState.Connected);
             expect(messaging.transport.send).toHaveBeenCalledWith(ElementWidgetActions.JoinCall, {
-                audioInput: "1",
-                videoInput: "2",
+                audioInput: "Headphones",
+                videoInput: "Built-in webcam",
             });
         });
 
diff --git a/test/test-utils/test-utils.ts b/test/test-utils/test-utils.ts
index 67c038502f..0647f2604d 100644
--- a/test/test-utils/test-utils.ts
+++ b/test/test-utils/test-utils.ts
@@ -34,6 +34,7 @@ import {
 } from 'matrix-js-sdk/src/matrix';
 import { normalize } from "matrix-js-sdk/src/utils";
 import { ReEmitter } from "matrix-js-sdk/src/ReEmitter";
+import { MediaHandler } from "matrix-js-sdk/src/webrtc/mediaHandler";
 
 import { MatrixClientPeg as peg } from '../../src/MatrixClientPeg';
 import { makeType } from "../../src/utils/TypeUtils";
@@ -175,6 +176,11 @@ export function createTestClient(): MatrixClient {
         sendToDevice: jest.fn().mockResolvedValue(undefined),
         queueToDevice: jest.fn().mockResolvedValue(undefined),
         encryptAndSendToDevices: jest.fn().mockResolvedValue(undefined),
+
+        getMediaHandler: jest.fn().mockReturnValue({
+            setVideoInput: jest.fn(),
+            setAudioInput: jest.fn(),
+        } as unknown as MediaHandler),
     } as unknown as MatrixClient;
 
     client.reEmitter = new ReEmitter(client);