From 0cca5ae1741f672a674a5a258bcc4afcdb33a177 Mon Sep 17 00:00:00 2001 From: David Baker Date: Wed, 15 Mar 2023 14:35:10 +0000 Subject: [PATCH] Slightly evolved but not-yet-working OpenTelemetry More usefully, including docker config for starting a CORS enabled OTLP collector so we don't have to use zipkin. --- config/otel_dev/README.md | 7 + config/otel_dev/collector-gateway.yaml | 38 +++++ config/otel_dev/docker-compose.yaml | 23 +++ src/otel/OTelGroupCallMembership.ts | 206 +++++++++++++++++++++++++ src/otel/otel.ts | 96 ++++++++++++ 5 files changed, 370 insertions(+) create mode 100644 config/otel_dev/README.md create mode 100644 config/otel_dev/collector-gateway.yaml create mode 100644 config/otel_dev/docker-compose.yaml create mode 100644 src/otel/OTelGroupCallMembership.ts create mode 100644 src/otel/otel.ts diff --git a/config/otel_dev/README.md b/config/otel_dev/README.md new file mode 100644 index 0000000..19bf00d --- /dev/null +++ b/config/otel_dev/README.md @@ -0,0 +1,7 @@ +# OpenTelemetry Collector for development + +This directory contains a docker compose file that starts a jaeger all-in-one instance +with an in-memory database, along with a standalong OpenTelemetry collector that forwards +traces into the jaeger. Jaeger has a built-in OpenTelemetry collector, but it can't be +configured to send CORS headers so can't be used from a browser. This sets the config on +the collector to send CORS headers. diff --git a/config/otel_dev/collector-gateway.yaml b/config/otel_dev/collector-gateway.yaml new file mode 100644 index 0000000..2c70fb4 --- /dev/null +++ b/config/otel_dev/collector-gateway.yaml @@ -0,0 +1,38 @@ +receivers: + otlp: + protocols: + http: + endpoint: 0.0.0.0:4318 + cors: + allowed_origins: + - "http://*" + allowed_headers: + - "*" +processors: + batch: + timeout: 1s + resource: + attributes: + - key: test.key + value: "test-value" + action: insert +exporters: + logging: + loglevel: info + jaeger: + endpoint: jaeger-all-in-one:14250 + tls: + insecure: true +extensions: + health_check: + pprof: + endpoint: :1888 + zpages: + endpoint: :55679 +service: + extensions: [pprof, zpages, health_check] + pipelines: + traces: + receivers: [otlp] + processors: [batch, resource] + exporters: [logging, jaeger] diff --git a/config/otel_dev/docker-compose.yaml b/config/otel_dev/docker-compose.yaml new file mode 100644 index 0000000..478f80a --- /dev/null +++ b/config/otel_dev/docker-compose.yaml @@ -0,0 +1,23 @@ +version: "2" +services: + # Jaeger + jaeger-all-in-one: + image: jaegertracing/all-in-one:latest + ports: + - "16686:16686" + - "14268" + - "14250" + # Collector + collector-gateway: + image: otel/opentelemetry-collector:latest + volumes: + - ./collector-gateway.yaml:/etc/collector-gateway.yaml + command: [ "--config=/etc/collector-gateway.yaml" ] + ports: + - "1888:1888" # pprof extension + - "13133:13133" # health_check extension + - "4317:4317" # OTLP gRPC receiver + - "4318:4318" # OTLP HTTP receiver + - "55670:55679" # zpages extension + depends_on: + - jaeger-all-in-one diff --git a/src/otel/OTelGroupCallMembership.ts b/src/otel/OTelGroupCallMembership.ts new file mode 100644 index 0000000..56a8cf8 --- /dev/null +++ b/src/otel/OTelGroupCallMembership.ts @@ -0,0 +1,206 @@ +/* +Copyright 2023 New Vector Ltd + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +import opentelemetry, { Context, Span } from "@opentelemetry/api"; +import { + ClientEvent, + GroupCall, + MatrixClient, + MatrixEvent, + RoomStateEvent, +} from "matrix-js-sdk"; +import { CallEvent } from "matrix-js-sdk/src/webrtc/call"; +import { useCallback, useEffect, useState } from "react"; + +import { tracer } from "./otel"; + +/** + * Represent the span of time which we intend to be joined to a group call + */ +export class OTelGroupCallMembership { + private context: Context; + private callMembershipSpan: Span; + + constructor(private groupCall: GroupCall) { + const callIdContext = opentelemetry.context + .active() + .setValue(Symbol("confId"), groupCall.groupCallId); + + // Create the main span that tracks the time we intend to be in the call + this.callMembershipSpan = tracer.startSpan( + "otel_groupCallMembershipSpan", + undefined, + callIdContext + ); + + // Create a new call based on the callIdContext. This context also has a span assigned to it. + // Other spans can use this context to extract the parent span. + // (When passing this context to startSpan the started span will use the span set in the context (in this case the callSpan) as the parent) + this.context = opentelemetry.trace.setSpan( + opentelemetry.context.active(), + this.callMembershipSpan + ); + } + + public onJoinCall() { + // Here we start a very short span. This is a hack to trigger the posthog exporter. + // Only ended spans are processed by the exporter. + // We want the exporter to know that a call has started + const joinCallSpan = tracer.startSpan( + "otel_joinCallSpan", + undefined, + this.context + ); + joinCallSpan.end(); + } + + public onLeaveCall() { + // A very short span to represent us leaving the call + const startCallSpan = tracer.startSpan( + "otel_leaveCallSpan", + undefined, + this.context + ); + startCallSpan.end(); + + // and end the main span to indicate we've left + this.callMembershipSpan.end(); + } + + public onSendStateEvent(stateEvent: MatrixEvent) {} + + public onSendToDeviceEvent(toDeviceEvent: Record) { + const eventType = toDeviceEvent.eventType as string; + if (!eventType.startsWith("m.call")) return; + + const span = tracer.startSpan( + `otel_sendToDeviceEvent_${toDeviceEvent.eventType}`, + undefined, + this.context + ); + + for (const [k, v] of Object.entries(toDeviceEvent)) { + if (["string", "number"].includes(typeof v)) + span.setAttribute(k, v as string | number); + } + } +} + +export const useCallEventInstrumentation = ( + client: MatrixClient, + groupCall: GroupCall +): void => { + const [groupCallSpan, setGroupCallSpan] = useState(); + const [groupCallId, setGroupCallId] = useState(); + + const startChildSpan = useCallback( + (name: string, groupCallId: string): Span => { + const traceId = "7b78c1f568312cb288e55a9bc3c28cc5"; + const spanId = "7d31f3e430d90882"; + + const ctx = opentelemetry.trace.setSpanContext(context.active(), { + traceId, + spanId, + traceFlags: 1, + isRemote: true, + }); + + console.log("LOG context", ctx); + console.log( + "LOG context valid", + trace.isSpanContextValid(trace.getSpan(ctx).spanContext()) + ); + console.log("LOG parent span", trace.getSpan(ctx)); + + return tracer.startSpan(name, undefined, ctx); + }, + [] + ); + + const onUpdateRoomState = useCallback((event?: MatrixEvent) => { + /*const callStateEvent = groupCall.room.currentState.getStateEvents( + "org.matrix.msc3401.call", + groupCall.groupCallId + );*/ + /*const memberStateEvents = groupCall.room.currentState.getStateEvents( + "org.matrix.msc3401.call.member" + );*/ + }, []); + + const onReceivedVoipEvent = (event: MatrixEvent) => {}; + + const onUndecryptableToDevice = (event: MatrixEvent) => {}; + + const onSendVoipEvent = useCallback( + (event: Record) => { + const span = startChildSpan( + `element-call:send-voip-event:${event.eventType}`, + groupCall.groupCallId + ); + span.setAttribute("groupCallId", groupCall.groupCallId); + + console.log("LOG span", span); + + span.end(); + }, + [groupCall.groupCallId, startChildSpan] + ); + + useEffect(() => { + return; + if (groupCallId === groupCall.groupCallId) return; + + console.log("LOG starting span", groupCall.groupCallId, groupCallId); + + groupCallSpan?.end(); + + const newSpan = tracer.startSpan("element-call:group-call"); + newSpan.setAttribute("groupCallId", groupCall.groupCallId); + setGroupCallSpan(newSpan); + setGroupCallId(groupCall.groupCallId); + }, [groupCallSpan, groupCallId, groupCall.groupCallId]); + + useEffect(() => () => { + console.log("LOG ending span"); + + groupCallSpan?.end(); + }); + + useEffect(() => { + client.on(RoomStateEvent.Events, onUpdateRoomState); + //groupCall.on("calls_changed", onCallsChanged); + groupCall.on(CallEvent.SendVoipEvent, onSendVoipEvent); + //client.on("state", onCallsChanged); + //client.on("hangup", onCallHangup); + client.on(ClientEvent.ReceivedVoipEvent, onReceivedVoipEvent); + client.on(ClientEvent.UndecryptableToDeviceEvent, onUndecryptableToDevice); + + onUpdateRoomState(); + + return () => { + client.removeListener(RoomStateEvent.Events, onUpdateRoomState); + //groupCall.removeListener("calls_changed", onCallsChanged); + groupCall.removeListener(CallEvent.SendVoipEvent, onSendVoipEvent); + //client.removeListener("state", onCallsChanged); + //client.removeListener("hangup", onCallHangup); + client.removeListener(ClientEvent.ReceivedVoipEvent, onReceivedVoipEvent); + client.removeListener( + ClientEvent.UndecryptableToDeviceEvent, + onUndecryptableToDevice + ); + }; + }, [client, groupCall, onSendVoipEvent, onUpdateRoomState]); +}; diff --git a/src/otel/otel.ts b/src/otel/otel.ts new file mode 100644 index 0000000..6f546db --- /dev/null +++ b/src/otel/otel.ts @@ -0,0 +1,96 @@ +/* document-load.ts|js file - the code is the same for both the languages */ +import { + ConsoleSpanExporter, + SimpleSpanProcessor, +} from "@opentelemetry/sdk-trace-base"; +import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-http"; +import { WebTracerProvider } from "@opentelemetry/sdk-trace-web"; +import opentelemetry from "@opentelemetry/api"; +import { Context } from "@opentelemetry/api"; +import { Resource } from "@opentelemetry/resources"; +import { SemanticResourceAttributes } from "@opentelemetry/semantic-conventions"; + +import { PosthogSpanExporter } from "../analytics/OtelPosthogExporter"; + +const SERVICE_NAME = "element-call"; + +const otlpExporter = new OTLPTraceExporter(); +const consoleExporter = new ConsoleSpanExporter(); +const posthogExporter = new PosthogSpanExporter(); + +// This is how we can make Jaeger show a reaonsable service in the dropdown on the left. +const providerConfig = { + resource: new Resource({ + [SemanticResourceAttributes.SERVICE_NAME]: SERVICE_NAME, + }), +}; +const provider = new WebTracerProvider(providerConfig); + +provider.addSpanProcessor(new SimpleSpanProcessor(otlpExporter)); +provider.addSpanProcessor(new SimpleSpanProcessor(posthogExporter)); +provider.addSpanProcessor(new SimpleSpanProcessor(consoleExporter)); + +// This is not the serviceName shown in jaeger +export const tracer = opentelemetry.trace.getTracer( + "my-element-call-otl-tracer" +); + +class CallTracer { + // We create one tracer class for each main context. + // Even if differnt tracer classes overlap in time space, we might want to visulaize them seperately. + // The Call Tracer should only contain spans/events that are relevant to understand the procedure of the individual candidates. + // Another Tracer Class (for example a ConnectionTracer) can contain a very granular list of all steps to connect to a call. + + private callSpan; + private callContext; + private muteSpan?; + + public startGroupCall(groupCallId: string) {} + + public startCall(callId: string): Context { + // The main context will be set when initiating the main/parent span. + + // Create an initial context with the callId param + const callIdContext = opentelemetry.context + .active() + .setValue(Symbol("callId"), callId); + + // Create the main span that tracks the whole call + this.callSpan = tracer.startSpan("otel_callSpan", undefined, callIdContext); + + // Create a new call based on the callIdContext. This context also has a span assigned to it. + // Other spans can use this context to extract the parent span. + // (When passing this context to startSpan the started span will use the span set in the context (in this case the callSpan) as the parent) + this.callContext = opentelemetry.trace.setSpan( + opentelemetry.context.active(), + this.callSpan + ); + + // Here we start a very short span. This is a hack to trigger the posthog exporter. + // Only ended spans are processed by the exporter. + // We want the exporter to know that a call has started + const startCallSpan = tracer.startSpan( + "otel_startCallSpan", + undefined, + this.callContext + ); + startCallSpan.end(); + } + public muteMic(muteState: boolean) { + if (muteState) { + this.muteSpan = tracer.startSpan( + "otel_muteSpan", + undefined, + this.callContext + ); + } else if (this.muteSpan) { + this.muteSpan.end(); + this.muteSpan = null; + } + } + public endCall() { + this.callSpan?.end(); + } +} + +export const callTracer = new CallTracer();