Make spatial audio Firefox-only

Hopefully explained in comment: we have a heisenbug where we sometimes
lack audio from a certain participant, so this simplifies the audio
path by removing the workaround required to do AEC with spatial audio
on chrome.
This commit is contained in:
David Baker 2022-12-16 17:12:17 +00:00
parent f9845617b3
commit 223793a445
11 changed files with 94 additions and 208 deletions

View file

@ -151,6 +151,15 @@
margin-right: 10px;
}
.checkboxField.disabled,
.checkboxField.disabled .description {
color: var(--quinary-content);
}
.checkboxField.disabled .checkbox {
border-color: var(--quinary-content);
}
.checkbox svg {
display: none;
}

View file

@ -50,19 +50,19 @@ import { Avatar } from "../Avatar";
import { UserMenuContainer } from "../UserMenuContainer";
import { useRageshakeRequestModal } from "../settings/submit-rageshake";
import { RageshakeRequestModal } from "./RageshakeRequestModal";
import { useMediaHandler } from "../settings/useMediaHandler";
import { useShowInspector, useSpatialAudio } from "../settings/useSetting";
import { useModalTriggerState } from "../Modal";
import { useAudioContext } from "../video-grid/useMediaStream";
import { useFullscreen } from "../video-grid/useFullscreen";
import { AudioContainer } from "../video-grid/AudioContainer";
import { useAudioOutputDevice } from "../video-grid/useAudioOutputDevice";
import { PosthogAnalytics } from "../PosthogAnalytics";
import { widget, ElementWidgetActions } from "../widget";
import { useJoinRule } from "./useJoinRule";
import { useUrlParams } from "../UrlParams";
import { usePrefersReducedMotion } from "../usePrefersReducedMotion";
import { ConnectionState, ParticipantInfo } from "./useGroupCall";
import { TileDescriptor } from "../video-grid/TileDescriptor";
import { ParticipantInfo } from "./useGroupCall";
import { AudioSink } from "../video-grid/AudioSink";
import { useMediaHandler } from "../settings/useMediaHandler";
const canScreenshare = "getDisplayMedia" in (navigator.mediaDevices ?? {});
// There is currently a bug in Safari our our code with cloning and sending MediaStreams
@ -91,18 +91,6 @@ interface Props {
hideHeader: boolean;
}
// Represents something that should get a tile on the layout,
// ie. a user's video feed or a screen share feed.
export interface TileDescriptor {
id: string;
member: RoomMember;
focused: boolean;
presenter: boolean;
callFeed?: CallFeed;
isLocal?: boolean;
connectionState: ConnectionState;
}
export function InCallView({
client,
groupCall,
@ -145,15 +133,12 @@ export function InCallView({
const [spatialAudio] = useSpatialAudio();
const [audioContext, audioDestination, audioRef] = useAudioContext();
const { audioOutput } = useMediaHandler();
const [audioContext, audioDestination] = useAudioContext();
const [showInspector] = useShowInspector();
const { modalState: feedbackModalState, modalProps: feedbackModalProps } =
useModalTriggerState();
useAudioOutputDevice(audioRef, audioOutput);
const { hideScreensharing } = useUrlParams();
useEffect(() => {
@ -347,16 +332,30 @@ export function InCallView({
[styles.maximised]: maximisedParticipant,
});
// If spatial audio is disabled, we render one audio tag for each participant
// (with spatial audio, all the audio goes via the Web Audio API)
// We also do this if there's a feed maximised because we only trigger spatial
// audio rendering for feeds that we're displaying, which will need to be fixed
// once we start having more participants than we can fit on a screen, but this
// is a workaround for now.
const { audioOutput } = useMediaHandler();
const audioElements: JSX.Element[] = [];
if (!spatialAudio || maximisedParticipant) {
for (const item of items) {
if (item.isLocal) continue; // We don't want to render own audio
audioElements.push(
<AudioSink
tileDescriptor={item}
audioOutput={audioOutput}
key={item.id}
/>
);
}
}
return (
<div className={containerClasses} ref={containerRef}>
<audio ref={audioRef} />
{(!spatialAudio || maximisedParticipant) && (
<AudioContainer
items={items}
audioContext={audioContext}
audioDestination={audioDestination}
/>
)}
<>{audioElements}</>
{!hideHeader && !maximisedParticipant && (
<Header>
<LeftNav>

View file

@ -32,6 +32,7 @@ import {
useSpatialAudio,
useShowInspector,
useOptInAnalytics,
canEnableSpatialAudio,
} from "./useSetting";
import { FieldRow, InputField } from "../input/Input";
import { Button } from "../button";
@ -115,9 +116,14 @@ export const SettingsModal = (props: Props) => {
label={t("Spatial audio")}
type="checkbox"
checked={spatialAudio}
description={t(
"This will make a speaker's audio seem as if it is coming from where their tile is positioned on screen. (Experimental feature: this may impact the stability of audio.)"
)}
disabled={!canEnableSpatialAudio()}
description={
canEnableSpatialAudio()
? t(
"This will make a speaker's audio seem as if it is coming from where their tile is positioned on screen. (Experimental feature: this may impact the stability of audio.)"
)
: t("This feature is only supported on Firefox.")
}
onChange={(event: React.ChangeEvent<HTMLInputElement>) =>
setSpatialAudio(event.target.checked)
}

View file

@ -58,7 +58,26 @@ export const getSetting = <T>(name: string, defaultValue: T): T => {
return item === null ? defaultValue : JSON.parse(item);
};
export const useSpatialAudio = () => useSetting("spatial-audio", false);
export const canEnableSpatialAudio = () => {
const { userAgent } = navigator;
// Spatial audio means routing audio through audio contexts. On Chrome,
// this bypasses the AEC processor and so breaks echo cancellation.
// We only allow spatial audio to be enabled on Firefox which we know
// passes audio context audio through the AEC algorithm.
// https://bugs.chromium.org/p/chromium/issues/detail?id=687574 is the
// chrome bug for this: once this is fixed and the updated version is deployed
// widely enough, we can allow spatial audio everywhere. It's currently in a
// chrome flag, so we could enable this in Electron if we enabled the chrome flag
// in the Electron wrapper.
return userAgent.includes("Firefox");
};
export const useSpatialAudio = (): [boolean, (val: boolean) => void] => {
const settingVal = useSetting("spatial-audio", false);
if (canEnableSpatialAudio()) return settingVal;
return [false, (_: boolean) => {}];
};
export const useShowInspector = () => useSetting("show-inspector", false);
export const useOptInAnalytics = () => useSetting("opt-in-analytics", false);
export const useKeyboardShortcuts = () =>

View file

@ -1,96 +0,0 @@
/*
Copyright 2022 New Vector Ltd
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
import React, { FC, useEffect, useRef } from "react";
import { TileDescriptor } from "../room/InCallView";
import { useCallFeed } from "./useCallFeed";
import { useMediaStreamTrackCount } from "./useMediaStream";
// XXX: These in fact do not render anything but to my knowledge this is the
// only way to a hook on an array
interface AudioForParticipantProps {
item: TileDescriptor;
audioContext: AudioContext;
audioDestination: AudioNode;
}
export const AudioForParticipant: FC<AudioForParticipantProps> = ({
item,
audioContext,
audioDestination,
}) => {
const { stream, localVolume } = useCallFeed(item.callFeed);
const [audioTrackCount] = useMediaStreamTrackCount(stream);
const gainNodeRef = useRef<GainNode>();
const sourceRef = useRef<MediaStreamAudioSourceNode>();
useEffect(() => {
// We don't compare the audioMuted flag of useCallFeed here, since unmuting
// depends on to-device messages which may lag behind the audio actually
// starting to flow over the network
if (!item.isLocal && audioContext && audioTrackCount > 0) {
if (!gainNodeRef.current) {
gainNodeRef.current = new GainNode(audioContext, {
gain: localVolume,
});
}
if (!sourceRef.current) {
sourceRef.current = audioContext.createMediaStreamSource(stream);
}
const source = sourceRef.current;
const gainNode = gainNodeRef.current;
gainNode.gain.value = localVolume;
source.connect(gainNode).connect(audioDestination);
return () => {
source.disconnect();
gainNode.disconnect();
};
}
}, [
item,
audioContext,
audioDestination,
stream,
localVolume,
audioTrackCount,
]);
return null;
};
interface AudioContainerProps {
items: TileDescriptor[];
audioContext: AudioContext;
audioDestination: AudioNode;
}
export const AudioContainer: FC<AudioContainerProps> = ({ items, ...rest }) => {
return (
<>
{items
.filter((item) => !item.isLocal)
.map((item) => (
<AudioForParticipant key={item.id} item={item} {...rest} />
))}
</>
);
};

View file

@ -21,8 +21,8 @@ import { RoomMember } from "matrix-js-sdk";
import { VideoGrid, useVideoGridLayout } from "./VideoGrid";
import { VideoTile } from "./VideoTile";
import { Button } from "../button";
import { TileDescriptor } from "../room/InCallView";
import { ConnectionState } from "../room/useGroupCall";
import { TileDescriptor } from "./TileDescriptor";
export default {
title: "VideoGrid",

View file

@ -23,7 +23,7 @@ import { ReactDOMAttributes } from "@use-gesture/react/dist/declarations/src/typ
import styles from "./VideoGrid.module.css";
import { Layout } from "../room/GridLayoutMenu";
import { TileDescriptor } from "../room/InCallView";
import { TileDescriptor } from "./TileDescriptor";
interface TilePosition {
x: number;

View file

@ -25,7 +25,7 @@ import { useRoomMemberName } from "./useRoomMemberName";
import { VideoTile } from "./VideoTile";
import { VideoTileSettingsModal } from "./VideoTileSettingsModal";
import { useModalTriggerState } from "../Modal";
import { TileDescriptor } from "../room/InCallView";
import { TileDescriptor } from "./TileDescriptor";
interface Props {
item: TileDescriptor;
@ -72,7 +72,7 @@ export function VideoTileContainer({
audioContext,
audioDestination,
localVolume,
isLocal || maximised
isLocal
);
const {
modalState: videoTileSettingsModalState,

View file

@ -16,6 +16,8 @@ limitations under the License.
import { RefObject, useEffect } from "react";
// Uses setSinkId on an audio output element to set the device it outputs to,
// where supported by the browser.
export function useAudioOutputDevice(
mediaRef: RefObject<MediaElement>,
audioOutputDevice: string | undefined

View file

@ -17,8 +17,8 @@ limitations under the License.
import { useCallback, useEffect, useState } from "react";
import { TileDescriptor } from "../room/InCallView";
import { useEventTarget } from "../useEvents";
import { TileDescriptor } from "./TileDescriptor";
import { useCallFeed } from "./useCallFeed";
export function useFullscreen(ref: React.RefObject<HTMLElement>): {

View file

@ -15,7 +15,6 @@ limitations under the License.
*/
import { useRef, useEffect, RefObject, useState, useCallback } from "react";
import { parse as parseSdp, write as writeSdp } from "sdp-transform";
import {
acquireContext,
releaseContext,
@ -64,6 +63,8 @@ export const useMediaStreamTrackCount = (
return [audioTrackCount, videoTrackCount];
};
// Binds a media stream to a media output element, returning a ref for the
// media element that should then be passed to the media element to be used.
export const useMediaStream = (
stream: MediaStream | null,
audioOutputDevice: string | null,
@ -78,7 +79,7 @@ export const useMediaStream = (
console.log(
`useMediaStream update stream mediaRef.current ${!!mediaRef.current} stream ${
stream && stream.id
}`
} muted ${mute}`
);
if (mediaRef.current) {
@ -127,89 +128,30 @@ export const useMediaStream = (
return mediaRef;
};
// Loops the given audio stream back through a local peer connection, to make
// AEC work with Web Audio streams on Chrome. The resulting stream should be
// played through an audio element.
// This hack can be removed once the following bug is resolved:
// https://bugs.chromium.org/p/chromium/issues/detail?id=687574
const createLoopback = async (stream: MediaStream): Promise<MediaStream> => {
// Prepare our local peer connections
const conn = new RTCPeerConnection();
const loopbackConn = new RTCPeerConnection();
const loopbackStream = new MediaStream();
conn.addEventListener("icecandidate", ({ candidate }) => {
if (candidate) loopbackConn.addIceCandidate(new RTCIceCandidate(candidate));
});
loopbackConn.addEventListener("icecandidate", ({ candidate }) => {
if (candidate) conn.addIceCandidate(new RTCIceCandidate(candidate));
});
loopbackConn.addEventListener("track", ({ track }) =>
loopbackStream.addTrack(track)
);
// Hook the connections together
stream.getTracks().forEach((track) => conn.addTrack(track));
const offer = await conn.createOffer({
offerToReceiveAudio: false,
offerToReceiveVideo: false,
});
await conn.setLocalDescription(offer);
await loopbackConn.setRemoteDescription(offer);
const answer = await loopbackConn.createAnswer();
// Rewrite SDP to be stereo and (variable) max bitrate
const parsedSdp = parseSdp(answer.sdp!);
parsedSdp.media.forEach((m) =>
m.fmtp.forEach(
(f) => (f.config += `;stereo=1;cbr=0;maxaveragebitrate=510000;`)
)
);
answer.sdp = writeSdp(parsedSdp);
await loopbackConn.setLocalDescription(answer);
await conn.setRemoteDescription(answer);
return loopbackStream;
};
export const useAudioContext = (): [
AudioContext,
AudioNode,
RefObject<MediaElement>
] => {
// Provides a properly refcounted instance of the shared audio context,
// along with the context's destination audio node and a ref to be used
// for the <audio> sink element.
export const useAudioContext = (): [AudioContext, AudioNode] => {
const context = useRef<AudioContext>();
const destination = useRef<AudioNode>();
const audioRef = useRef<MediaElement>();
useEffect(() => {
if (audioRef.current && !context.current) {
if (!context.current) {
context.current = acquireContext();
if (window.chrome) {
// We're in Chrome, which needs a loopback hack applied to enable AEC
const streamDest = context.current.createMediaStreamDestination();
destination.current = streamDest;
const audioEl = audioRef.current;
(async () => {
audioEl.srcObject = await createLoopback(streamDest.stream);
await audioEl.play();
})();
return () => {
audioEl.srcObject = null;
releaseContext();
};
} else {
destination.current = context.current.destination;
return releaseContext;
}
destination.current = context.current.destination;
return releaseContext;
}
}, []);
return [context.current!, destination.current!, audioRef];
return [context.current!, destination.current!];
};
// Either renders a media stream with spatial audio or is just a no-op wrapper
// around useMediaStream, depending on whether spatial audio is enabled.
// Returns refs for the tile element from which the position is derived and
// a <video> element to render the video to.
// (hooks can't be conditional so we must use the same hook in each case).
export const useSpatialMediaStream = (
stream: MediaStream | null,
audioContext: AudioContext,
@ -219,7 +161,12 @@ export const useSpatialMediaStream = (
): [RefObject<HTMLDivElement>, RefObject<MediaElement>] => {
const tileRef = useRef<HTMLDivElement | null>(null);
const [spatialAudio] = useSpatialAudio();
// We always handle audio separately form the video element
// This media stream is only used for the video - the audio goes via the audio
// context, so the audio output doesn't matter and the element is always muted
// (we could split the video out into a separate stream with just the video track
// and pass that as the srcObject of the element, but it seems unnecessary when we
// can just mute the element).
const mediaRef = useMediaStream(stream, null, true);
const [audioTrackCount] = useMediaStreamTrackCount(stream);