From fd430687ec1431be6c3af1b7420278b636c36e59 Mon Sep 17 00:00:00 2001 From: WillTaylorDev Date: Fri, 11 Oct 2024 08:47:03 -0400 Subject: [PATCH] WC-2695 Add better visibility for router-worker (#6941) This commit instruments the router worker with analytics such as request time, colo metadata, error, etc, in order for us to have better visibility into the router worker. These changes were tested using gradual rollouts with a 0% version and a Cloudflare-Workers-Version-Overrides header. --- .changeset/poor-shoes-tickle.md | 5 ++ .../router-worker/src/analytics.ts | 71 +++++++++++++++++++ .../workers-shared/router-worker/src/index.ts | 46 +++++++++++- .../router-worker/src/performance.ts | 16 +++++ .../workers-shared/router-worker/src/types.ts | 25 +++++++ .../router-worker/wrangler.toml | 9 ++- 6 files changed, 170 insertions(+), 2 deletions(-) create mode 100644 .changeset/poor-shoes-tickle.md create mode 100644 packages/workers-shared/router-worker/src/analytics.ts create mode 100644 packages/workers-shared/router-worker/src/performance.ts create mode 100644 packages/workers-shared/router-worker/src/types.ts diff --git a/.changeset/poor-shoes-tickle.md b/.changeset/poor-shoes-tickle.md new file mode 100644 index 000000000000..3a5e9e317c4e --- /dev/null +++ b/.changeset/poor-shoes-tickle.md @@ -0,0 +1,5 @@ +--- +"@cloudflare/workers-shared": minor +--- + +feat: Add observability to router-worker diff --git a/packages/workers-shared/router-worker/src/analytics.ts b/packages/workers-shared/router-worker/src/analytics.ts new file mode 100644 index 000000000000..fb0135db0811 --- /dev/null +++ b/packages/workers-shared/router-worker/src/analytics.ts @@ -0,0 +1,71 @@ +import type { Environment, ReadyAnalytics } from "./types"; + +// This will allow us to make breaking changes to the analytic schema +const VERSION = 1; + +export enum DISPATCH_TYPE { + ASSETS = "asset", + WORKER = "worker", +} + +// When adding new columns please update the schema +type Data = { + // -- Doubles -- + // double1 - The time it takes for the whole request to complete in milliseconds + requestTime?: number; + // double2 - Colo ID + coloId?: number; + // double3 - Metal ID + metalId?: number; + // double4 - Colo tier (e.g. tier 1, tier 2, tier 3) + coloTier?: number; + + // -- Blobs -- + // blob1 - Hostname of the request + hostname?: string; + // blob2 - Dispatch type - what kind of thing did we dispatch + dispatchtype?: DISPATCH_TYPE; + // blob3 - Error message + error?: string; + // blob4 - The current version UUID of router-server + version?: string; + // blob5 - Region of the colo (e.g. WEUR) + coloRegion?: string; +}; + +export class Analytics { + private data: Data = {}; + + setData(newData: Partial) { + this.data = { ...this.data, ...newData }; + } + + getData(key: keyof Data) { + return this.data[key]; + } + + write(env: Environment, readyAnalytics?: ReadyAnalytics, hostname?: string) { + if (!readyAnalytics) { + return; + } + + readyAnalytics.logEvent({ + version: VERSION, + accountId: 0, // TODO: need to plumb through + indexId: hostname, + doubles: [ + this.data.requestTime ?? -1, // double1 + this.data.coloId ?? -1, // double2 + this.data.metalId ?? -1, // double3 + this.data.coloTier ?? -1, // double4 + ], + blobs: [ + this.data.hostname?.substring(0, 256), // blob1 - trim to 256 bytes + this.data.dispatchtype, // blob2 + this.data.error?.substring(0, 256), // blob3 - trim to 256 bytes + this.data.version, // blob4 + this.data.coloRegion, // blob5 + ], + }); + } +} diff --git a/packages/workers-shared/router-worker/src/index.ts b/packages/workers-shared/router-worker/src/index.ts index 91ddba0ec4b7..71f638a94f63 100644 --- a/packages/workers-shared/router-worker/src/index.ts +++ b/packages/workers-shared/router-worker/src/index.ts @@ -1,6 +1,14 @@ import { setupSentry } from "../../utils/sentry"; +import { Analytics, DISPATCH_TYPE } from "./analytics"; +import { PerformanceTimer } from "./performance"; import type AssetWorker from "../../asset-worker/src/index"; import type { RoutingConfig } from "../../utils/types"; +import type { + ColoMetadata, + Environment, + ReadyAnalytics, + UnsafePerformanceTimer, +} from "./types"; interface Env { ASSET_WORKER: Service; @@ -8,6 +16,11 @@ interface Env { CONFIG: RoutingConfig; SENTRY_DSN: string; + ENVIRONMENT: Environment; + ANALYTICS: ReadyAnalytics; + COLO_METADATA: ColoMetadata; + UNSAFE_PERFORMANCE: UnsafePerformanceTimer; + VERSION_METADATA: WorkerVersionMetadata; SENTRY_ACCESS_CLIENT_ID: string; SENTRY_ACCESS_CLIENT_SECRET: string; @@ -16,7 +29,9 @@ interface Env { export default { async fetch(request: Request, env: Env, ctx: ExecutionContext) { let sentry: ReturnType | undefined; - const maybeSecondRequest = request.clone(); + const analytics = new Analytics(); + const performance = new PerformanceTimer(env.UNSAFE_PERFORMANCE); + const startTimeMs = performance.now(); try { sentry = setupSentry( @@ -27,21 +42,50 @@ export default { env.SENTRY_ACCESS_CLIENT_SECRET ); + const url = new URL(request.url); + if (sentry) { + sentry.setUser({ username: url.hostname }); + sentry.setTag("colo", env.COLO_METADATA.coloId); + sentry.setTag("metal", env.COLO_METADATA.metalId); + } + + if (env.COLO_METADATA && env.VERSION_METADATA) { + analytics.setData({ + coloId: env.COLO_METADATA.coloId, + metalId: env.COLO_METADATA.metalId, + coloTier: env.COLO_METADATA.coloTier, + coloRegion: env.COLO_METADATA.coloRegion, + hostname: url.hostname, + version: env.VERSION_METADATA.id, + }); + } + + const maybeSecondRequest = request.clone(); if (env.CONFIG.has_user_worker) { if (await env.ASSET_WORKER.unstable_canFetch(request)) { + analytics.setData({ dispatchtype: DISPATCH_TYPE.ASSETS }); return await env.ASSET_WORKER.fetch(maybeSecondRequest); } else { + analytics.setData({ dispatchtype: DISPATCH_TYPE.WORKER }); return env.USER_WORKER.fetch(maybeSecondRequest); } } + analytics.setData({ dispatchtype: DISPATCH_TYPE.ASSETS }); return await env.ASSET_WORKER.fetch(request); } catch (err) { + if (err instanceof Error) { + analytics.setData({ error: err.message }); + } + // Log to Sentry if we can if (sentry) { sentry.captureException(err); } throw err; + } finally { + analytics.setData({ requestTime: performance.now() - startTimeMs }); + analytics.write(env.ENVIRONMENT, env.ANALYTICS); } }, }; diff --git a/packages/workers-shared/router-worker/src/performance.ts b/packages/workers-shared/router-worker/src/performance.ts new file mode 100644 index 000000000000..1b61bd8db0de --- /dev/null +++ b/packages/workers-shared/router-worker/src/performance.ts @@ -0,0 +1,16 @@ +import type { UnsafePerformanceTimer } from "./types"; + +export class PerformanceTimer { + private performanceTimer; + + constructor(performanceTimer?: UnsafePerformanceTimer) { + this.performanceTimer = performanceTimer; + } + + now() { + if (this.performanceTimer) { + return this.performanceTimer.timeOrigin + this.performanceTimer.now(); + } + return Date.now(); + } +} diff --git a/packages/workers-shared/router-worker/src/types.ts b/packages/workers-shared/router-worker/src/types.ts new file mode 100644 index 000000000000..c0bf3f155a4f --- /dev/null +++ b/packages/workers-shared/router-worker/src/types.ts @@ -0,0 +1,25 @@ +export type Environment = "production" | "staging"; + +export interface ReadyAnalytics { + logEvent: (e: ReadyAnalyticsEvent) => void; +} + +export interface ColoMetadata { + metalId: number; + coloId: number; + coloRegion: string; + coloTier: number; +} + +export interface UnsafePerformanceTimer { + readonly timeOrigin: number; + now: () => number; +} + +export interface ReadyAnalyticsEvent { + accountId?: number; + indexId?: string; + version?: number; + doubles?: (number | undefined)[]; + blobs?: (string | undefined)[]; +} diff --git a/packages/workers-shared/router-worker/wrangler.toml b/packages/workers-shared/router-worker/wrangler.toml index ea58a34feee0..d96e0418f1e0 100644 --- a/packages/workers-shared/router-worker/wrangler.toml +++ b/packages/workers-shared/router-worker/wrangler.toml @@ -13,6 +13,9 @@ workers_dev = false main = "src/index.ts" compatibility_date = "2024-07-31" +[version_metadata] +binding = "VERSION_METADATA" + [[unsafe.bindings]] name = "CONFIG" type = "param" @@ -29,4 +32,8 @@ type = "origin" [unsafe.metadata.build_options] stable_id = "cloudflare/cf_router_worker" -networks = ["cf","jdc"] \ No newline at end of file +networks = ["cf","jdc"] + +[[unsafe.bindings]] +name = "workers-router-worker" +type = "internal_capability_grants" \ No newline at end of file