Include OpenTelemetry traces in rageshakes

This commit is contained in:
Robin Townsend 2023-04-11 01:09:52 -04:00
parent 6999765f39
commit 95eca18207
4 changed files with 148 additions and 21 deletions

View file

@ -15,9 +15,12 @@ limitations under the License.
*/
import { SpanExporter, ReadableSpan } from "@opentelemetry/sdk-trace-base";
import { ExportResult, ExportResultCode } from "@opentelemetry/core";
import {
ExportResult,
ExportResultCode,
hrTimeToMilliseconds,
} from "@opentelemetry/core";
import { logger } from "matrix-js-sdk/src/logger";
import { HrTime } from "@opentelemetry/api";
import { PosthogAnalytics } from "./PosthogAnalytics";
@ -26,10 +29,6 @@ interface PrevCall {
hangupTs: number;
}
function hrTimeToMs(time: HrTime): number {
return time[0] * 1000 + time[1] * 0.000001;
}
/**
* The maximum time between hanging up and joining the same call that we would
* consider a 'rejoin' on the user's part.
@ -86,14 +85,14 @@ export class PosthogSpanExporter implements SpanExporter {
const prevCall = this.prevCall;
const newPrevCall = (this.prevCall = {
callId: span.attributes["matrix.confId"] as string,
hangupTs: hrTimeToMs(span.endTime),
hangupTs: hrTimeToMilliseconds(span.endTime),
});
// If the user joined the same call within a short time frame, log this as a
// rejoin. This is interesting as a call quality metric, since rejoins may
// indicate that users had to intervene to make the product work.
if (prevCall !== null && newPrevCall.callId === prevCall.callId) {
const duration = hrTimeToMs(span.startTime) - prevCall.hangupTs;
const duration = hrTimeToMilliseconds(span.startTime) - prevCall.hangupTs;
if (duration <= maxRejoinMs) {
PosthogAnalytics.instance.trackEvent(
{

View file

@ -0,0 +1,103 @@
import { Attributes } from "@opentelemetry/api";
import {
ExportResult,
ExportResultCode,
hrTimeToMilliseconds,
} from "@opentelemetry/core";
import { SpanExporter, ReadableSpan } from "@opentelemetry/sdk-trace-base";
const dumpAttributes = (attr: Attributes) =>
Object.entries(attr).map(([key, value]) => ({
key,
type: typeof value,
value,
}));
/**
* Exports spans on demand to the Jaeger JSON format, which can be attached to
* rageshakes and loaded into analysis tools like Jaeger and Stalk.
*/
export class RageshakeSpanExporter implements SpanExporter {
private readonly spans: ReadableSpan[] = [];
export(
spans: ReadableSpan[],
resultCallback: (result: ExportResult) => void
): void {
this.spans.push(...spans);
resultCallback({ code: ExportResultCode.SUCCESS });
}
/**
* Dumps the spans collected so far as Jaeger-compatible JSON.
*/
public dump(): string {
const traces = new Map<string, ReadableSpan[]>();
// Organize spans by their trace IDs
for (const span of this.spans) {
const traceId = span.spanContext().traceId;
let trace = traces.get(traceId);
if (trace === undefined) {
trace = [];
traces.set(traceId, trace);
}
trace.push(span);
}
const processId = "p1";
const processes = {
[processId]: {
serviceName: "element-call",
tags: [],
},
warnings: null,
};
return JSON.stringify({
// Honestly not sure what some of these fields mean, I just know that
// they're present in Jaeger JSON exports
total: 0,
limit: 0,
offset: 0,
errors: null,
data: [...traces.entries()].map(([traceId, spans]) => ({
traceID: traceId,
warnings: null,
processes,
spans: spans.map((span) => {
const ctx = span.spanContext();
return {
traceID: traceId,
spanID: ctx.spanId,
operationName: span.name,
processID: processId,
warnings: null,
startTime: hrTimeToMilliseconds(span.startTime),
duration: hrTimeToMilliseconds(span.duration),
references:
span.parentSpanId === undefined
? []
: [
{
refType: "CHILD_OF",
traceID: traceId,
spanID: span.parentSpanId,
},
],
tags: dumpAttributes(span.attributes),
logs: span.events.map((event) => ({
timestamp: hrTimeToMilliseconds(event.time),
fields: dumpAttributes(event.attributes ?? {}),
})),
};
}),
})),
});
}
async shutdown(): Promise<void> {}
}

View file

@ -28,6 +28,7 @@ import { logger } from "matrix-js-sdk/src/logger";
import { PosthogSpanExporter } from "../analytics/OtelPosthogExporter";
import { Anonymity } from "../analytics/PosthogAnalytics";
import { Config } from "../config/Config";
import { RageshakeSpanExporter } from "../analytics/RageshakeSpanExporter";
const SERVICE_NAME = "element-call";
@ -37,21 +38,24 @@ export class ElementCallOpenTelemetry {
private _provider: WebTracerProvider;
private _tracer: Tracer;
private _anonymity: Anonymity;
private _otlpExporter: OTLPTraceExporter;
private otlpExporter: OTLPTraceExporter;
public readonly rageshakeExporter?: RageshakeSpanExporter;
static globalInit(): void {
const config = Config.get();
// we always enable opentelemetry in general. We only enable the OTLP
// collector if a URL is defined (and in future if another setting is defined)
// The posthog exporteer is always enabled, posthog reporting is enabled or disabled
// within the posthog code.
const shouldEnableOtlp = Boolean(Config.get().opentelemetry?.collector_url);
const shouldEnableOtlp = Boolean(config.opentelemetry?.collector_url);
if (!sharedInstance || sharedInstance.isOtlpEnabled !== shouldEnableOtlp) {
logger.info("(Re)starting OpenTelemetry debug reporting");
sharedInstance?.dispose();
sharedInstance = new ElementCallOpenTelemetry(
Config.get().opentelemetry?.collector_url
config.opentelemetry?.collector_url,
config.rageshake?.submit_url
);
}
}
@ -60,7 +64,10 @@ export class ElementCallOpenTelemetry {
return sharedInstance;
}
constructor(collectorUrl: string | undefined) {
constructor(
collectorUrl: string | undefined,
rageshakeUrl: string | undefined
) {
// This is how we can make Jaeger show a reaonsable service in the dropdown on the left.
const providerConfig = {
resource: new Resource({
@ -71,15 +78,24 @@ export class ElementCallOpenTelemetry {
if (collectorUrl) {
logger.info("Enabling OTLP collector with URL " + collectorUrl);
this._otlpExporter = new OTLPTraceExporter({
this.otlpExporter = new OTLPTraceExporter({
url: collectorUrl,
});
this._provider.addSpanProcessor(
new SimpleSpanProcessor(this._otlpExporter)
new SimpleSpanProcessor(this.otlpExporter)
);
} else {
logger.info("OTLP collector disabled");
}
if (rageshakeUrl) {
logger.info("Enabling rageshake collector");
this.rageshakeExporter = new RageshakeSpanExporter();
this._provider.addSpanProcessor(
new SimpleSpanProcessor(this.rageshakeExporter)
);
}
const consoleExporter = new ConsoleSpanExporter();
const posthogExporter = new PosthogSpanExporter();
@ -99,7 +115,7 @@ export class ElementCallOpenTelemetry {
}
public get isOtlpEnabled(): boolean {
return Boolean(this._otlpExporter);
return Boolean(this.otlpExporter);
}
public get tracer(): Tracer {

View file

@ -25,6 +25,14 @@ import { useClient } from "../ClientContext";
import { InspectorContext } from "../room/GroupCallInspector";
import { useModalTriggerState } from "../Modal";
import { Config } from "../config/Config";
import { ElementCallOpenTelemetry } from "../otel/otel";
const gzip = (text: string): Blob => {
// encode as UTF-8
const buf = new TextEncoder().encode(text);
// compress
return new Blob([pako.gzip(buf)]);
};
interface RageShakeSubmitOptions {
sendLogs: boolean;
@ -235,14 +243,15 @@ export function useSubmitRageshake(): {
const logs = await getLogsForReport();
for (const entry of logs) {
// encode as UTF-8
let buf = new TextEncoder().encode(entry.lines);
// compress
buf = pako.gzip(buf);
body.append("compressed-log", new Blob([buf]), entry.id);
body.append("compressed-log", gzip(entry.lines), entry.id);
}
body.append(
"file",
gzip(ElementCallOpenTelemetry.instance.rageshakeExporter!.dump()),
"traces.json"
);
if (inspectorState) {
body.append(
"file",