diff --git a/src/media-utils.ts b/src/media-utils.ts
new file mode 100644
index 0000000..e0841cc
--- /dev/null
+++ b/src/media-utils.ts
@@ -0,0 +1,71 @@
+/*
+Copyright 2022 Matrix.org Foundation C.I.C.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+import { logger } from "matrix-js-sdk/src/logger";
+
+/**
+ * Finds a media device with label matching 'deviceName'
+ * @param deviceName The label of the device to look for
+ * @param devices The list of devices to search
+ * @returns A matching media device or undefined if no matching device was found
+ */
+export async function findDeviceByName(
+  deviceName: string,
+  kind: MediaDeviceKind,
+  devices: MediaDeviceInfo[]
+): Promise<string | undefined> {
+  const deviceInfo = devices.find(
+    (d) => d.kind === kind && d.label === deviceName
+  );
+  return deviceInfo?.deviceId;
+}
+
+/**
+ * Gets the available audio input/output and video input devices
+ * from the browser: a wrapper around mediaDevices.enumerateDevices()
+ * that requests a stream and holds it while calling enumerateDevices().
+ * This is because some browsers (Firefox) only return device labels when
+ * the app has an active user media stream. In Chrome, this will get a
+ * stream from the default camera which can mean, for example, that the
+ * light for the FaceTime camera turns on briefly even if you selected
+ * another camera. Once the Permissions API
+ * (https://developer.mozilla.org/en-US/docs/Web/API/Permissions_API)
+ * is ready for primetime, this should allow us to avoid this.
+ *
+ * @return The available media devices
+ */
+export async function getDevices(): Promise<MediaDeviceInfo[]> {
+  let stream: MediaStream;
+  try {
+    stream = await navigator.mediaDevices.getUserMedia({
+      audio: true,
+      video: true,
+    });
+  } catch (e) {
+    logger.info("Couldn't get media stream for enumerateDevices: failing");
+    throw e;
+  }
+
+  try {
+    return await navigator.mediaDevices.enumerateDevices();
+  } catch (error) {
+    logger.warn("Unable to refresh WebRTC Devices: ", error);
+  } finally {
+    for (const track of stream.getTracks()) {
+      track.stop();
+    }
+  }
+}
diff --git a/src/room/GroupCallView.tsx b/src/room/GroupCallView.tsx
index 0ee6cfb..b777444 100644
--- a/src/room/GroupCallView.tsx
+++ b/src/room/GroupCallView.tsx
@@ -18,6 +18,7 @@ import React, { useCallback, useEffect, useState } from "react";
 import { useHistory } from "react-router-dom";
 import { GroupCall, GroupCallState } from "matrix-js-sdk/src/webrtc/groupCall";
 import { MatrixClient } from "matrix-js-sdk/src/client";
+import { logger } from "matrix-js-sdk/src/logger";
 
 import type { IWidgetApiRequest } from "matrix-widget-api";
 import { widget, ElementWidgetActions, JoinCallData } from "../widget";
@@ -31,6 +32,7 @@ import { useRoomAvatar } from "./useRoomAvatar";
 import { useSentryGroupCallHandler } from "./useSentryGroupCallHandler";
 import { useLocationNavigation } from "../useLocationNavigation";
 import { useMediaHandler } from "../settings/useMediaHandler";
+import { findDeviceByName, getDevices } from "../media-utils";
 
 declare global {
   interface Window {
@@ -94,10 +96,45 @@ export function GroupCallView({
     if (widget && preload) {
       // In preload mode, wait for a join action before entering
       const onJoin = async (ev: CustomEvent<IWidgetApiRequest>) => {
+        // Get the available devices so we can match the selected device
+        // to its ID. This involves getting a media stream (see docs on
+        // the function) so we only do it once and re-use the result.
+        const devices = await getDevices();
+
         const { audioInput, videoInput } = ev.detail
           .data as unknown as JoinCallData;
-        if (audioInput !== null) setAudioInput(audioInput);
-        if (videoInput !== null) setVideoInput(videoInput);
+
+        if (audioInput !== null) {
+          const deviceId = await findDeviceByName(
+            audioInput,
+            "audioinput",
+            devices
+          );
+          if (!deviceId) {
+            logger.warn("Unknown audio input: " + audioInput);
+          } else {
+            logger.debug(
+              `Found audio input ID ${deviceId} for name ${audioInput}`
+            );
+            setAudioInput(deviceId);
+          }
+        }
+
+        if (videoInput !== null) {
+          const deviceId = await findDeviceByName(
+            videoInput,
+            "videoinput",
+            devices
+          );
+          if (!deviceId) {
+            logger.warn("Unknown video input: " + videoInput);
+          } else {
+            logger.debug(
+              `Found video input ID ${deviceId} for name ${videoInput}`
+            );
+            setVideoInput(deviceId);
+          }
+        }
         await Promise.all([
           groupCall.setMicrophoneMuted(audioInput === null),
           groupCall.setLocalVideoMuted(videoInput === null),