Include OpenTelemetry traces in rageshakes
This commit is contained in:
parent
6999765f39
commit
95eca18207
4 changed files with 148 additions and 21 deletions
|
@ -15,9 +15,12 @@ limitations under the License.
|
|||
*/
|
||||
|
||||
import { SpanExporter, ReadableSpan } from "@opentelemetry/sdk-trace-base";
|
||||
import { ExportResult, ExportResultCode } from "@opentelemetry/core";
|
||||
import {
|
||||
ExportResult,
|
||||
ExportResultCode,
|
||||
hrTimeToMilliseconds,
|
||||
} from "@opentelemetry/core";
|
||||
import { logger } from "matrix-js-sdk/src/logger";
|
||||
import { HrTime } from "@opentelemetry/api";
|
||||
|
||||
import { PosthogAnalytics } from "./PosthogAnalytics";
|
||||
|
||||
|
@ -26,10 +29,6 @@ interface PrevCall {
|
|||
hangupTs: number;
|
||||
}
|
||||
|
||||
function hrTimeToMs(time: HrTime): number {
|
||||
return time[0] * 1000 + time[1] * 0.000001;
|
||||
}
|
||||
|
||||
/**
|
||||
* The maximum time between hanging up and joining the same call that we would
|
||||
* consider a 'rejoin' on the user's part.
|
||||
|
@ -86,14 +85,14 @@ export class PosthogSpanExporter implements SpanExporter {
|
|||
const prevCall = this.prevCall;
|
||||
const newPrevCall = (this.prevCall = {
|
||||
callId: span.attributes["matrix.confId"] as string,
|
||||
hangupTs: hrTimeToMs(span.endTime),
|
||||
hangupTs: hrTimeToMilliseconds(span.endTime),
|
||||
});
|
||||
|
||||
// If the user joined the same call within a short time frame, log this as a
|
||||
// rejoin. This is interesting as a call quality metric, since rejoins may
|
||||
// indicate that users had to intervene to make the product work.
|
||||
if (prevCall !== null && newPrevCall.callId === prevCall.callId) {
|
||||
const duration = hrTimeToMs(span.startTime) - prevCall.hangupTs;
|
||||
const duration = hrTimeToMilliseconds(span.startTime) - prevCall.hangupTs;
|
||||
if (duration <= maxRejoinMs) {
|
||||
PosthogAnalytics.instance.trackEvent(
|
||||
{
|
||||
|
|
103
src/analytics/RageshakeSpanExporter.ts
Normal file
103
src/analytics/RageshakeSpanExporter.ts
Normal file
|
@ -0,0 +1,103 @@
|
|||
import { Attributes } from "@opentelemetry/api";
|
||||
import {
|
||||
ExportResult,
|
||||
ExportResultCode,
|
||||
hrTimeToMilliseconds,
|
||||
} from "@opentelemetry/core";
|
||||
import { SpanExporter, ReadableSpan } from "@opentelemetry/sdk-trace-base";
|
||||
|
||||
const dumpAttributes = (attr: Attributes) =>
|
||||
Object.entries(attr).map(([key, value]) => ({
|
||||
key,
|
||||
type: typeof value,
|
||||
value,
|
||||
}));
|
||||
|
||||
/**
|
||||
* Exports spans on demand to the Jaeger JSON format, which can be attached to
|
||||
* rageshakes and loaded into analysis tools like Jaeger and Stalk.
|
||||
*/
|
||||
export class RageshakeSpanExporter implements SpanExporter {
|
||||
private readonly spans: ReadableSpan[] = [];
|
||||
|
||||
export(
|
||||
spans: ReadableSpan[],
|
||||
resultCallback: (result: ExportResult) => void
|
||||
): void {
|
||||
this.spans.push(...spans);
|
||||
resultCallback({ code: ExportResultCode.SUCCESS });
|
||||
}
|
||||
|
||||
/**
|
||||
* Dumps the spans collected so far as Jaeger-compatible JSON.
|
||||
*/
|
||||
public dump(): string {
|
||||
const traces = new Map<string, ReadableSpan[]>();
|
||||
|
||||
// Organize spans by their trace IDs
|
||||
for (const span of this.spans) {
|
||||
const traceId = span.spanContext().traceId;
|
||||
let trace = traces.get(traceId);
|
||||
|
||||
if (trace === undefined) {
|
||||
trace = [];
|
||||
traces.set(traceId, trace);
|
||||
}
|
||||
|
||||
trace.push(span);
|
||||
}
|
||||
|
||||
const processId = "p1";
|
||||
const processes = {
|
||||
[processId]: {
|
||||
serviceName: "element-call",
|
||||
tags: [],
|
||||
},
|
||||
warnings: null,
|
||||
};
|
||||
|
||||
return JSON.stringify({
|
||||
// Honestly not sure what some of these fields mean, I just know that
|
||||
// they're present in Jaeger JSON exports
|
||||
total: 0,
|
||||
limit: 0,
|
||||
offset: 0,
|
||||
errors: null,
|
||||
data: [...traces.entries()].map(([traceId, spans]) => ({
|
||||
traceID: traceId,
|
||||
warnings: null,
|
||||
processes,
|
||||
spans: spans.map((span) => {
|
||||
const ctx = span.spanContext();
|
||||
|
||||
return {
|
||||
traceID: traceId,
|
||||
spanID: ctx.spanId,
|
||||
operationName: span.name,
|
||||
processID: processId,
|
||||
warnings: null,
|
||||
startTime: hrTimeToMilliseconds(span.startTime),
|
||||
duration: hrTimeToMilliseconds(span.duration),
|
||||
references:
|
||||
span.parentSpanId === undefined
|
||||
? []
|
||||
: [
|
||||
{
|
||||
refType: "CHILD_OF",
|
||||
traceID: traceId,
|
||||
spanID: span.parentSpanId,
|
||||
},
|
||||
],
|
||||
tags: dumpAttributes(span.attributes),
|
||||
logs: span.events.map((event) => ({
|
||||
timestamp: hrTimeToMilliseconds(event.time),
|
||||
fields: dumpAttributes(event.attributes ?? {}),
|
||||
})),
|
||||
};
|
||||
}),
|
||||
})),
|
||||
});
|
||||
}
|
||||
|
||||
async shutdown(): Promise<void> {}
|
||||
}
|
|
@ -28,6 +28,7 @@ import { logger } from "matrix-js-sdk/src/logger";
|
|||
import { PosthogSpanExporter } from "../analytics/OtelPosthogExporter";
|
||||
import { Anonymity } from "../analytics/PosthogAnalytics";
|
||||
import { Config } from "../config/Config";
|
||||
import { RageshakeSpanExporter } from "../analytics/RageshakeSpanExporter";
|
||||
|
||||
const SERVICE_NAME = "element-call";
|
||||
|
||||
|
@ -37,21 +38,24 @@ export class ElementCallOpenTelemetry {
|
|||
private _provider: WebTracerProvider;
|
||||
private _tracer: Tracer;
|
||||
private _anonymity: Anonymity;
|
||||
private _otlpExporter: OTLPTraceExporter;
|
||||
private otlpExporter: OTLPTraceExporter;
|
||||
public readonly rageshakeExporter?: RageshakeSpanExporter;
|
||||
|
||||
static globalInit(): void {
|
||||
const config = Config.get();
|
||||
// we always enable opentelemetry in general. We only enable the OTLP
|
||||
// collector if a URL is defined (and in future if another setting is defined)
|
||||
// The posthog exporteer is always enabled, posthog reporting is enabled or disabled
|
||||
// within the posthog code.
|
||||
const shouldEnableOtlp = Boolean(Config.get().opentelemetry?.collector_url);
|
||||
const shouldEnableOtlp = Boolean(config.opentelemetry?.collector_url);
|
||||
|
||||
if (!sharedInstance || sharedInstance.isOtlpEnabled !== shouldEnableOtlp) {
|
||||
logger.info("(Re)starting OpenTelemetry debug reporting");
|
||||
sharedInstance?.dispose();
|
||||
|
||||
sharedInstance = new ElementCallOpenTelemetry(
|
||||
Config.get().opentelemetry?.collector_url
|
||||
config.opentelemetry?.collector_url,
|
||||
config.rageshake?.submit_url
|
||||
);
|
||||
}
|
||||
}
|
||||
|
@ -60,7 +64,10 @@ export class ElementCallOpenTelemetry {
|
|||
return sharedInstance;
|
||||
}
|
||||
|
||||
constructor(collectorUrl: string | undefined) {
|
||||
constructor(
|
||||
collectorUrl: string | undefined,
|
||||
rageshakeUrl: string | undefined
|
||||
) {
|
||||
// This is how we can make Jaeger show a reaonsable service in the dropdown on the left.
|
||||
const providerConfig = {
|
||||
resource: new Resource({
|
||||
|
@ -71,15 +78,24 @@ export class ElementCallOpenTelemetry {
|
|||
|
||||
if (collectorUrl) {
|
||||
logger.info("Enabling OTLP collector with URL " + collectorUrl);
|
||||
this._otlpExporter = new OTLPTraceExporter({
|
||||
this.otlpExporter = new OTLPTraceExporter({
|
||||
url: collectorUrl,
|
||||
});
|
||||
this._provider.addSpanProcessor(
|
||||
new SimpleSpanProcessor(this._otlpExporter)
|
||||
new SimpleSpanProcessor(this.otlpExporter)
|
||||
);
|
||||
} else {
|
||||
logger.info("OTLP collector disabled");
|
||||
}
|
||||
|
||||
if (rageshakeUrl) {
|
||||
logger.info("Enabling rageshake collector");
|
||||
this.rageshakeExporter = new RageshakeSpanExporter();
|
||||
this._provider.addSpanProcessor(
|
||||
new SimpleSpanProcessor(this.rageshakeExporter)
|
||||
);
|
||||
}
|
||||
|
||||
const consoleExporter = new ConsoleSpanExporter();
|
||||
const posthogExporter = new PosthogSpanExporter();
|
||||
|
||||
|
@ -99,7 +115,7 @@ export class ElementCallOpenTelemetry {
|
|||
}
|
||||
|
||||
public get isOtlpEnabled(): boolean {
|
||||
return Boolean(this._otlpExporter);
|
||||
return Boolean(this.otlpExporter);
|
||||
}
|
||||
|
||||
public get tracer(): Tracer {
|
||||
|
|
|
@ -25,6 +25,14 @@ import { useClient } from "../ClientContext";
|
|||
import { InspectorContext } from "../room/GroupCallInspector";
|
||||
import { useModalTriggerState } from "../Modal";
|
||||
import { Config } from "../config/Config";
|
||||
import { ElementCallOpenTelemetry } from "../otel/otel";
|
||||
|
||||
const gzip = (text: string): Blob => {
|
||||
// encode as UTF-8
|
||||
const buf = new TextEncoder().encode(text);
|
||||
// compress
|
||||
return new Blob([pako.gzip(buf)]);
|
||||
};
|
||||
|
||||
interface RageShakeSubmitOptions {
|
||||
sendLogs: boolean;
|
||||
|
@ -235,14 +243,15 @@ export function useSubmitRageshake(): {
|
|||
const logs = await getLogsForReport();
|
||||
|
||||
for (const entry of logs) {
|
||||
// encode as UTF-8
|
||||
let buf = new TextEncoder().encode(entry.lines);
|
||||
// compress
|
||||
buf = pako.gzip(buf);
|
||||
|
||||
body.append("compressed-log", new Blob([buf]), entry.id);
|
||||
body.append("compressed-log", gzip(entry.lines), entry.id);
|
||||
}
|
||||
|
||||
body.append(
|
||||
"file",
|
||||
gzip(ElementCallOpenTelemetry.instance.rageshakeExporter!.dump()),
|
||||
"traces.json"
|
||||
);
|
||||
|
||||
if (inspectorState) {
|
||||
body.append(
|
||||
"file",
|
||||
|
|
Loading…
Add table
Reference in a new issue