Include OpenTelemetry traces in rageshakes
This commit is contained in:
parent
6999765f39
commit
95eca18207
4 changed files with 148 additions and 21 deletions
|
@ -15,9 +15,12 @@ limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { SpanExporter, ReadableSpan } from "@opentelemetry/sdk-trace-base";
|
import { SpanExporter, ReadableSpan } from "@opentelemetry/sdk-trace-base";
|
||||||
import { ExportResult, ExportResultCode } from "@opentelemetry/core";
|
import {
|
||||||
|
ExportResult,
|
||||||
|
ExportResultCode,
|
||||||
|
hrTimeToMilliseconds,
|
||||||
|
} from "@opentelemetry/core";
|
||||||
import { logger } from "matrix-js-sdk/src/logger";
|
import { logger } from "matrix-js-sdk/src/logger";
|
||||||
import { HrTime } from "@opentelemetry/api";
|
|
||||||
|
|
||||||
import { PosthogAnalytics } from "./PosthogAnalytics";
|
import { PosthogAnalytics } from "./PosthogAnalytics";
|
||||||
|
|
||||||
|
@ -26,10 +29,6 @@ interface PrevCall {
|
||||||
hangupTs: number;
|
hangupTs: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
function hrTimeToMs(time: HrTime): number {
|
|
||||||
return time[0] * 1000 + time[1] * 0.000001;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The maximum time between hanging up and joining the same call that we would
|
* The maximum time between hanging up and joining the same call that we would
|
||||||
* consider a 'rejoin' on the user's part.
|
* consider a 'rejoin' on the user's part.
|
||||||
|
@ -86,14 +85,14 @@ export class PosthogSpanExporter implements SpanExporter {
|
||||||
const prevCall = this.prevCall;
|
const prevCall = this.prevCall;
|
||||||
const newPrevCall = (this.prevCall = {
|
const newPrevCall = (this.prevCall = {
|
||||||
callId: span.attributes["matrix.confId"] as string,
|
callId: span.attributes["matrix.confId"] as string,
|
||||||
hangupTs: hrTimeToMs(span.endTime),
|
hangupTs: hrTimeToMilliseconds(span.endTime),
|
||||||
});
|
});
|
||||||
|
|
||||||
// If the user joined the same call within a short time frame, log this as a
|
// If the user joined the same call within a short time frame, log this as a
|
||||||
// rejoin. This is interesting as a call quality metric, since rejoins may
|
// rejoin. This is interesting as a call quality metric, since rejoins may
|
||||||
// indicate that users had to intervene to make the product work.
|
// indicate that users had to intervene to make the product work.
|
||||||
if (prevCall !== null && newPrevCall.callId === prevCall.callId) {
|
if (prevCall !== null && newPrevCall.callId === prevCall.callId) {
|
||||||
const duration = hrTimeToMs(span.startTime) - prevCall.hangupTs;
|
const duration = hrTimeToMilliseconds(span.startTime) - prevCall.hangupTs;
|
||||||
if (duration <= maxRejoinMs) {
|
if (duration <= maxRejoinMs) {
|
||||||
PosthogAnalytics.instance.trackEvent(
|
PosthogAnalytics.instance.trackEvent(
|
||||||
{
|
{
|
||||||
|
|
103
src/analytics/RageshakeSpanExporter.ts
Normal file
103
src/analytics/RageshakeSpanExporter.ts
Normal file
|
@ -0,0 +1,103 @@
|
||||||
|
import { Attributes } from "@opentelemetry/api";
|
||||||
|
import {
|
||||||
|
ExportResult,
|
||||||
|
ExportResultCode,
|
||||||
|
hrTimeToMilliseconds,
|
||||||
|
} from "@opentelemetry/core";
|
||||||
|
import { SpanExporter, ReadableSpan } from "@opentelemetry/sdk-trace-base";
|
||||||
|
|
||||||
|
const dumpAttributes = (attr: Attributes) =>
|
||||||
|
Object.entries(attr).map(([key, value]) => ({
|
||||||
|
key,
|
||||||
|
type: typeof value,
|
||||||
|
value,
|
||||||
|
}));
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Exports spans on demand to the Jaeger JSON format, which can be attached to
|
||||||
|
* rageshakes and loaded into analysis tools like Jaeger and Stalk.
|
||||||
|
*/
|
||||||
|
export class RageshakeSpanExporter implements SpanExporter {
|
||||||
|
private readonly spans: ReadableSpan[] = [];
|
||||||
|
|
||||||
|
export(
|
||||||
|
spans: ReadableSpan[],
|
||||||
|
resultCallback: (result: ExportResult) => void
|
||||||
|
): void {
|
||||||
|
this.spans.push(...spans);
|
||||||
|
resultCallback({ code: ExportResultCode.SUCCESS });
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Dumps the spans collected so far as Jaeger-compatible JSON.
|
||||||
|
*/
|
||||||
|
public dump(): string {
|
||||||
|
const traces = new Map<string, ReadableSpan[]>();
|
||||||
|
|
||||||
|
// Organize spans by their trace IDs
|
||||||
|
for (const span of this.spans) {
|
||||||
|
const traceId = span.spanContext().traceId;
|
||||||
|
let trace = traces.get(traceId);
|
||||||
|
|
||||||
|
if (trace === undefined) {
|
||||||
|
trace = [];
|
||||||
|
traces.set(traceId, trace);
|
||||||
|
}
|
||||||
|
|
||||||
|
trace.push(span);
|
||||||
|
}
|
||||||
|
|
||||||
|
const processId = "p1";
|
||||||
|
const processes = {
|
||||||
|
[processId]: {
|
||||||
|
serviceName: "element-call",
|
||||||
|
tags: [],
|
||||||
|
},
|
||||||
|
warnings: null,
|
||||||
|
};
|
||||||
|
|
||||||
|
return JSON.stringify({
|
||||||
|
// Honestly not sure what some of these fields mean, I just know that
|
||||||
|
// they're present in Jaeger JSON exports
|
||||||
|
total: 0,
|
||||||
|
limit: 0,
|
||||||
|
offset: 0,
|
||||||
|
errors: null,
|
||||||
|
data: [...traces.entries()].map(([traceId, spans]) => ({
|
||||||
|
traceID: traceId,
|
||||||
|
warnings: null,
|
||||||
|
processes,
|
||||||
|
spans: spans.map((span) => {
|
||||||
|
const ctx = span.spanContext();
|
||||||
|
|
||||||
|
return {
|
||||||
|
traceID: traceId,
|
||||||
|
spanID: ctx.spanId,
|
||||||
|
operationName: span.name,
|
||||||
|
processID: processId,
|
||||||
|
warnings: null,
|
||||||
|
startTime: hrTimeToMilliseconds(span.startTime),
|
||||||
|
duration: hrTimeToMilliseconds(span.duration),
|
||||||
|
references:
|
||||||
|
span.parentSpanId === undefined
|
||||||
|
? []
|
||||||
|
: [
|
||||||
|
{
|
||||||
|
refType: "CHILD_OF",
|
||||||
|
traceID: traceId,
|
||||||
|
spanID: span.parentSpanId,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
tags: dumpAttributes(span.attributes),
|
||||||
|
logs: span.events.map((event) => ({
|
||||||
|
timestamp: hrTimeToMilliseconds(event.time),
|
||||||
|
fields: dumpAttributes(event.attributes ?? {}),
|
||||||
|
})),
|
||||||
|
};
|
||||||
|
}),
|
||||||
|
})),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async shutdown(): Promise<void> {}
|
||||||
|
}
|
|
@ -28,6 +28,7 @@ import { logger } from "matrix-js-sdk/src/logger";
|
||||||
import { PosthogSpanExporter } from "../analytics/OtelPosthogExporter";
|
import { PosthogSpanExporter } from "../analytics/OtelPosthogExporter";
|
||||||
import { Anonymity } from "../analytics/PosthogAnalytics";
|
import { Anonymity } from "../analytics/PosthogAnalytics";
|
||||||
import { Config } from "../config/Config";
|
import { Config } from "../config/Config";
|
||||||
|
import { RageshakeSpanExporter } from "../analytics/RageshakeSpanExporter";
|
||||||
|
|
||||||
const SERVICE_NAME = "element-call";
|
const SERVICE_NAME = "element-call";
|
||||||
|
|
||||||
|
@ -37,21 +38,24 @@ export class ElementCallOpenTelemetry {
|
||||||
private _provider: WebTracerProvider;
|
private _provider: WebTracerProvider;
|
||||||
private _tracer: Tracer;
|
private _tracer: Tracer;
|
||||||
private _anonymity: Anonymity;
|
private _anonymity: Anonymity;
|
||||||
private _otlpExporter: OTLPTraceExporter;
|
private otlpExporter: OTLPTraceExporter;
|
||||||
|
public readonly rageshakeExporter?: RageshakeSpanExporter;
|
||||||
|
|
||||||
static globalInit(): void {
|
static globalInit(): void {
|
||||||
|
const config = Config.get();
|
||||||
// we always enable opentelemetry in general. We only enable the OTLP
|
// we always enable opentelemetry in general. We only enable the OTLP
|
||||||
// collector if a URL is defined (and in future if another setting is defined)
|
// collector if a URL is defined (and in future if another setting is defined)
|
||||||
// The posthog exporteer is always enabled, posthog reporting is enabled or disabled
|
// The posthog exporteer is always enabled, posthog reporting is enabled or disabled
|
||||||
// within the posthog code.
|
// within the posthog code.
|
||||||
const shouldEnableOtlp = Boolean(Config.get().opentelemetry?.collector_url);
|
const shouldEnableOtlp = Boolean(config.opentelemetry?.collector_url);
|
||||||
|
|
||||||
if (!sharedInstance || sharedInstance.isOtlpEnabled !== shouldEnableOtlp) {
|
if (!sharedInstance || sharedInstance.isOtlpEnabled !== shouldEnableOtlp) {
|
||||||
logger.info("(Re)starting OpenTelemetry debug reporting");
|
logger.info("(Re)starting OpenTelemetry debug reporting");
|
||||||
sharedInstance?.dispose();
|
sharedInstance?.dispose();
|
||||||
|
|
||||||
sharedInstance = new ElementCallOpenTelemetry(
|
sharedInstance = new ElementCallOpenTelemetry(
|
||||||
Config.get().opentelemetry?.collector_url
|
config.opentelemetry?.collector_url,
|
||||||
|
config.rageshake?.submit_url
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -60,7 +64,10 @@ export class ElementCallOpenTelemetry {
|
||||||
return sharedInstance;
|
return sharedInstance;
|
||||||
}
|
}
|
||||||
|
|
||||||
constructor(collectorUrl: string | undefined) {
|
constructor(
|
||||||
|
collectorUrl: string | undefined,
|
||||||
|
rageshakeUrl: string | undefined
|
||||||
|
) {
|
||||||
// This is how we can make Jaeger show a reaonsable service in the dropdown on the left.
|
// This is how we can make Jaeger show a reaonsable service in the dropdown on the left.
|
||||||
const providerConfig = {
|
const providerConfig = {
|
||||||
resource: new Resource({
|
resource: new Resource({
|
||||||
|
@ -71,15 +78,24 @@ export class ElementCallOpenTelemetry {
|
||||||
|
|
||||||
if (collectorUrl) {
|
if (collectorUrl) {
|
||||||
logger.info("Enabling OTLP collector with URL " + collectorUrl);
|
logger.info("Enabling OTLP collector with URL " + collectorUrl);
|
||||||
this._otlpExporter = new OTLPTraceExporter({
|
this.otlpExporter = new OTLPTraceExporter({
|
||||||
url: collectorUrl,
|
url: collectorUrl,
|
||||||
});
|
});
|
||||||
this._provider.addSpanProcessor(
|
this._provider.addSpanProcessor(
|
||||||
new SimpleSpanProcessor(this._otlpExporter)
|
new SimpleSpanProcessor(this.otlpExporter)
|
||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
logger.info("OTLP collector disabled");
|
logger.info("OTLP collector disabled");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (rageshakeUrl) {
|
||||||
|
logger.info("Enabling rageshake collector");
|
||||||
|
this.rageshakeExporter = new RageshakeSpanExporter();
|
||||||
|
this._provider.addSpanProcessor(
|
||||||
|
new SimpleSpanProcessor(this.rageshakeExporter)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
const consoleExporter = new ConsoleSpanExporter();
|
const consoleExporter = new ConsoleSpanExporter();
|
||||||
const posthogExporter = new PosthogSpanExporter();
|
const posthogExporter = new PosthogSpanExporter();
|
||||||
|
|
||||||
|
@ -99,7 +115,7 @@ export class ElementCallOpenTelemetry {
|
||||||
}
|
}
|
||||||
|
|
||||||
public get isOtlpEnabled(): boolean {
|
public get isOtlpEnabled(): boolean {
|
||||||
return Boolean(this._otlpExporter);
|
return Boolean(this.otlpExporter);
|
||||||
}
|
}
|
||||||
|
|
||||||
public get tracer(): Tracer {
|
public get tracer(): Tracer {
|
||||||
|
|
|
@ -25,6 +25,14 @@ import { useClient } from "../ClientContext";
|
||||||
import { InspectorContext } from "../room/GroupCallInspector";
|
import { InspectorContext } from "../room/GroupCallInspector";
|
||||||
import { useModalTriggerState } from "../Modal";
|
import { useModalTriggerState } from "../Modal";
|
||||||
import { Config } from "../config/Config";
|
import { Config } from "../config/Config";
|
||||||
|
import { ElementCallOpenTelemetry } from "../otel/otel";
|
||||||
|
|
||||||
|
const gzip = (text: string): Blob => {
|
||||||
|
// encode as UTF-8
|
||||||
|
const buf = new TextEncoder().encode(text);
|
||||||
|
// compress
|
||||||
|
return new Blob([pako.gzip(buf)]);
|
||||||
|
};
|
||||||
|
|
||||||
interface RageShakeSubmitOptions {
|
interface RageShakeSubmitOptions {
|
||||||
sendLogs: boolean;
|
sendLogs: boolean;
|
||||||
|
@ -235,14 +243,15 @@ export function useSubmitRageshake(): {
|
||||||
const logs = await getLogsForReport();
|
const logs = await getLogsForReport();
|
||||||
|
|
||||||
for (const entry of logs) {
|
for (const entry of logs) {
|
||||||
// encode as UTF-8
|
body.append("compressed-log", gzip(entry.lines), entry.id);
|
||||||
let buf = new TextEncoder().encode(entry.lines);
|
|
||||||
// compress
|
|
||||||
buf = pako.gzip(buf);
|
|
||||||
|
|
||||||
body.append("compressed-log", new Blob([buf]), entry.id);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
body.append(
|
||||||
|
"file",
|
||||||
|
gzip(ElementCallOpenTelemetry.instance.rageshakeExporter!.dump()),
|
||||||
|
"traces.json"
|
||||||
|
);
|
||||||
|
|
||||||
if (inspectorState) {
|
if (inspectorState) {
|
||||||
body.append(
|
body.append(
|
||||||
"file",
|
"file",
|
||||||
|
|
Loading…
Add table
Reference in a new issue