diff --git a/src/analytics/OtelPosthogExporter.ts b/src/analytics/OtelPosthogExporter.ts index 245d874..f3ed167 100644 --- a/src/analytics/OtelPosthogExporter.ts +++ b/src/analytics/OtelPosthogExporter.ts @@ -15,9 +15,12 @@ limitations under the License. */ import { SpanExporter, ReadableSpan } from "@opentelemetry/sdk-trace-base"; -import { ExportResult, ExportResultCode } from "@opentelemetry/core"; +import { + ExportResult, + ExportResultCode, + hrTimeToMilliseconds, +} from "@opentelemetry/core"; import { logger } from "matrix-js-sdk/src/logger"; -import { HrTime } from "@opentelemetry/api"; import { PosthogAnalytics } from "./PosthogAnalytics"; @@ -26,10 +29,6 @@ interface PrevCall { hangupTs: number; } -function hrTimeToMs(time: HrTime): number { - return time[0] * 1000 + time[1] * 0.000001; -} - /** * The maximum time between hanging up and joining the same call that we would * consider a 'rejoin' on the user's part. @@ -88,14 +87,14 @@ export class PosthogSpanExporter implements SpanExporter { const prevCall = this.prevCall; const newPrevCall = (this.prevCall = { callId: span.attributes["matrix.confId"] as string, - hangupTs: hrTimeToMs(span.endTime), + hangupTs: hrTimeToMilliseconds(span.endTime), }); // If the user joined the same call within a short time frame, log this as a // rejoin. This is interesting as a call quality metric, since rejoins may // indicate that users had to intervene to make the product work. if (prevCall !== null && newPrevCall.callId === prevCall.callId) { - const duration = hrTimeToMs(span.startTime) - prevCall.hangupTs; + const duration = hrTimeToMilliseconds(span.startTime) - prevCall.hangupTs; if (duration <= maxRejoinMs) { PosthogAnalytics.instance.trackEvent( { diff --git a/src/analytics/RageshakeSpanExporter.ts b/src/analytics/RageshakeSpanExporter.ts new file mode 100644 index 0000000..249ff25 --- /dev/null +++ b/src/analytics/RageshakeSpanExporter.ts @@ -0,0 +1,103 @@ +import { Attributes } from "@opentelemetry/api"; +import { + ExportResult, + ExportResultCode, + hrTimeToMilliseconds, +} from "@opentelemetry/core"; +import { SpanExporter, ReadableSpan } from "@opentelemetry/sdk-trace-base"; + +const dumpAttributes = (attr: Attributes) => + Object.entries(attr).map(([key, value]) => ({ + key, + type: typeof value, + value, + })); + +/** + * Exports spans on demand to the Jaeger JSON format, which can be attached to + * rageshakes and loaded into analysis tools like Jaeger and Stalk. + */ +export class RageshakeSpanExporter implements SpanExporter { + private readonly spans: ReadableSpan[] = []; + + export( + spans: ReadableSpan[], + resultCallback: (result: ExportResult) => void + ): void { + this.spans.push(...spans); + resultCallback({ code: ExportResultCode.SUCCESS }); + } + + /** + * Dumps the spans collected so far as Jaeger-compatible JSON. + */ + public dump(): string { + const traces = new Map(); + + // Organize spans by their trace IDs + for (const span of this.spans) { + const traceId = span.spanContext().traceId; + let trace = traces.get(traceId); + + if (trace === undefined) { + trace = []; + traces.set(traceId, trace); + } + + trace.push(span); + } + + const processId = "p1"; + const processes = { + [processId]: { + serviceName: "element-call", + tags: [], + }, + warnings: null, + }; + + return JSON.stringify({ + // Honestly not sure what some of these fields mean, I just know that + // they're present in Jaeger JSON exports + total: 0, + limit: 0, + offset: 0, + errors: null, + data: [...traces.entries()].map(([traceId, spans]) => ({ + traceID: traceId, + warnings: null, + processes, + spans: spans.map((span) => { + const ctx = span.spanContext(); + + return { + traceID: traceId, + spanID: ctx.spanId, + operationName: span.name, + processID: processId, + warnings: null, + startTime: hrTimeToMilliseconds(span.startTime), + duration: hrTimeToMilliseconds(span.duration), + references: + span.parentSpanId === undefined + ? [] + : [ + { + refType: "CHILD_OF", + traceID: traceId, + spanID: span.parentSpanId, + }, + ], + tags: dumpAttributes(span.attributes), + logs: span.events.map((event) => ({ + timestamp: hrTimeToMilliseconds(event.time), + fields: dumpAttributes(event.attributes ?? {}), + })), + }; + }), + })), + }); + } + + async shutdown(): Promise {} +} diff --git a/src/otel/otel.ts b/src/otel/otel.ts index 4c92d5c..a73c6a0 100644 --- a/src/otel/otel.ts +++ b/src/otel/otel.ts @@ -28,6 +28,7 @@ import { logger } from "matrix-js-sdk/src/logger"; import { PosthogSpanExporter } from "../analytics/OtelPosthogExporter"; import { Anonymity } from "../analytics/PosthogAnalytics"; import { Config } from "../config/Config"; +import { RageshakeSpanExporter } from "../analytics/RageshakeSpanExporter"; const SERVICE_NAME = "element-call"; @@ -37,21 +38,24 @@ export class ElementCallOpenTelemetry { private _provider: WebTracerProvider; private _tracer: Tracer; private _anonymity: Anonymity; - private _otlpExporter: OTLPTraceExporter; + private otlpExporter: OTLPTraceExporter; + public readonly rageshakeExporter?: RageshakeSpanExporter; static globalInit(): void { + const config = Config.get(); // we always enable opentelemetry in general. We only enable the OTLP // collector if a URL is defined (and in future if another setting is defined) // The posthog exporteer is always enabled, posthog reporting is enabled or disabled // within the posthog code. - const shouldEnableOtlp = Boolean(Config.get().opentelemetry?.collector_url); + const shouldEnableOtlp = Boolean(config.opentelemetry?.collector_url); if (!sharedInstance || sharedInstance.isOtlpEnabled !== shouldEnableOtlp) { logger.info("(Re)starting OpenTelemetry debug reporting"); sharedInstance?.dispose(); sharedInstance = new ElementCallOpenTelemetry( - Config.get().opentelemetry?.collector_url + config.opentelemetry?.collector_url, + config.rageshake?.submit_url ); } } @@ -60,7 +64,10 @@ export class ElementCallOpenTelemetry { return sharedInstance; } - constructor(collectorUrl: string | undefined) { + constructor( + collectorUrl: string | undefined, + rageshakeUrl: string | undefined + ) { // This is how we can make Jaeger show a reaonsable service in the dropdown on the left. const providerConfig = { resource: new Resource({ @@ -71,15 +78,24 @@ export class ElementCallOpenTelemetry { if (collectorUrl) { logger.info("Enabling OTLP collector with URL " + collectorUrl); - this._otlpExporter = new OTLPTraceExporter({ + this.otlpExporter = new OTLPTraceExporter({ url: collectorUrl, }); this._provider.addSpanProcessor( - new SimpleSpanProcessor(this._otlpExporter) + new SimpleSpanProcessor(this.otlpExporter) ); } else { logger.info("OTLP collector disabled"); } + + if (rageshakeUrl) { + logger.info("Enabling rageshake collector"); + this.rageshakeExporter = new RageshakeSpanExporter(); + this._provider.addSpanProcessor( + new SimpleSpanProcessor(this.rageshakeExporter) + ); + } + const consoleExporter = new ConsoleSpanExporter(); const posthogExporter = new PosthogSpanExporter(); @@ -99,7 +115,7 @@ export class ElementCallOpenTelemetry { } public get isOtlpEnabled(): boolean { - return Boolean(this._otlpExporter); + return Boolean(this.otlpExporter); } public get tracer(): Tracer { diff --git a/src/settings/submit-rageshake.ts b/src/settings/submit-rageshake.ts index d2a9ccf..a41b886 100644 --- a/src/settings/submit-rageshake.ts +++ b/src/settings/submit-rageshake.ts @@ -25,6 +25,14 @@ import { useClient } from "../ClientContext"; import { InspectorContext } from "../room/GroupCallInspector"; import { useModalTriggerState } from "../Modal"; import { Config } from "../config/Config"; +import { ElementCallOpenTelemetry } from "../otel/otel"; + +const gzip = (text: string): Blob => { + // encode as UTF-8 + const buf = new TextEncoder().encode(text); + // compress + return new Blob([pako.gzip(buf)]); +}; interface RageShakeSubmitOptions { sendLogs: boolean; @@ -235,14 +243,15 @@ export function useSubmitRageshake(): { const logs = await getLogsForReport(); for (const entry of logs) { - // encode as UTF-8 - let buf = new TextEncoder().encode(entry.lines); - // compress - buf = pako.gzip(buf); - - body.append("compressed-log", new Blob([buf]), entry.id); + body.append("compressed-log", gzip(entry.lines), entry.id); } + body.append( + "file", + gzip(ElementCallOpenTelemetry.instance.rageshakeExporter!.dump()), + "traces.json" + ); + if (inspectorState) { body.append( "file",