Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add code for interpolation search for asset manifest lookup #8044

Merged
merged 1 commit into from
Feb 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/stupid-turtles-hammer.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
GregBrimble marked this conversation as resolved.
Show resolved Hide resolved
"@cloudflare/workers-shared": minor
---

chore: Adds analytics and code (zero-percent gated) for a new asset manifest search algorithm
13 changes: 13 additions & 0 deletions packages/workers-shared/asset-worker/crypto-polyfill.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import { afterAll, beforeAll } from "vitest";

// Can be deleted once Node.js (where these tests run) version is bumped to one which includes this global :)

beforeAll(() => {
// @ts-expect-error will go away once Node.js is bumped
globalThis.crypto = require("crypto");
});

afterAll(() => {
// @ts-expect-error will go away once Node.js is bumped
delete globalThis.crypto;
});
Binary file not shown.
91 changes: 87 additions & 4 deletions packages/workers-shared/asset-worker/src/assets-manifest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,26 @@ export class AssetsManifest {
this.data = data;
}

async get(pathname: string) {
async getWithBinarySearch(pathname: string) {
const pathHash = await hashPath(pathname);
const entry = binarySearch(
new Uint8Array(this.data, HEADER_SIZE),
pathHash
);
return entry ? contentHashToKey(entry) : null;
}

async getWithInterpolationSearch(pathname: string) {
const pathHash = await hashPath(pathname);
const entry = interpolationSearch(
new Uint8Array(this.data, HEADER_SIZE),
pathHash
);
return entry ? contentHashToKey(entry) : null;
}
}

const hashPath = async (path: string) => {
export const hashPath = async (path: string) => {
const encoder = new TextEncoder();
const data = encoder.encode(path);
const hashBuffer = await crypto.subtle.digest(
Expand All @@ -32,7 +41,7 @@ const hashPath = async (path: string) => {
return new Uint8Array(hashBuffer, 0, PATH_HASH_SIZE);
};

const binarySearch = (
export const binarySearch = (
arr: Uint8Array,
searchValue: Uint8Array
): Uint8Array | false => {
Expand Down Expand Up @@ -67,7 +76,81 @@ const binarySearch = (
}
};

const compare = (a: Uint8Array, b: Uint8Array) => {
const uint8ArrayToNumber = (uint8Array: Uint8Array) => {
const dataView = new DataView(uint8Array.buffer, uint8Array.byteOffset);
return (dataView.getBigUint64(0) << 64n) + dataView.getBigUint64(8);
};

export const interpolationSearch = (
arr: Uint8Array,
searchValue: Uint8Array
) => {
if (arr.byteLength === 0) {
return false;
}
let low = 0;
let high = arr.byteLength / ENTRY_SIZE - 1;
if (high === low) {
const current = new Uint8Array(arr.buffer, arr.byteOffset, PATH_HASH_SIZE);
if (current.byteLength !== searchValue.byteLength) {
throw new TypeError(
"Search value and current value are of different lengths"
);
}
const cmp = compare(current, searchValue);
if (cmp === 0) {
return new Uint8Array(arr.buffer, arr.byteOffset, ENTRY_SIZE);
} else {
return false;
}
}
const searchValueNumber = uint8ArrayToNumber(searchValue);
while (low <= high) {
const lowValue = new Uint8Array(
arr.buffer,
arr.byteOffset + low * ENTRY_SIZE,
PATH_HASH_SIZE
);
const highValue = new Uint8Array(
arr.buffer,
arr.byteOffset + high * ENTRY_SIZE,
PATH_HASH_SIZE
);
const mid = Math.floor(
Number(
BigInt(low) +
(BigInt(high - low) *
(searchValueNumber - uint8ArrayToNumber(lowValue))) /
(uint8ArrayToNumber(highValue) - uint8ArrayToNumber(lowValue))
)
);
const current = new Uint8Array(
arr.buffer,
arr.byteOffset + mid * ENTRY_SIZE,
PATH_HASH_SIZE
);
if (current.byteLength !== searchValue.byteLength) {
throw new TypeError(
"Search value and current value are of different lengths"
);
}
const cmp = compare(current, searchValue);
if (cmp === 0) {
return new Uint8Array(
arr.buffer,
arr.byteOffset + mid * ENTRY_SIZE,
ENTRY_SIZE
);
} else if (cmp < 0) {
low = mid + 1;
} else {
high = mid - 1;
}
}
return false;
};

export const compare = (a: Uint8Array, b: Uint8Array) => {
if (a.byteLength < b.byteLength) {
return -1;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import type { ReadyAnalytics } from "./types";

// This will allow us to make breaking changes to the analytic schema
const VERSION = 1;

// When adding new columns please update the schema
type Data = {
// -- Indexes --
accountId?: number;
experimentName?: string;

// -- Doubles --
// double1 - The time it takes to read the manifest in milliseconds
manifestReadTime?: number;

// -- Blobs --
// blob1 - Manifest read method
manifestReadMethod?: string;
};

export class ExperimentAnalytics {
private data: Data = {};
private readyAnalytics?: ReadyAnalytics;

constructor(readyAnalytics?: ReadyAnalytics) {
this.readyAnalytics = readyAnalytics;
}

setData(newData: Partial<Data>) {
this.data = { ...this.data, ...newData };
}

getData(key: keyof Data) {
return this.data[key];
}

write() {
if (!this.readyAnalytics) {
return;
}

this.readyAnalytics.logEvent({
version: VERSION,
accountId: this.data.accountId,
indexId: this.data.experimentName,
doubles: [
this.data.manifestReadTime ?? -1, // double1
],
blobs: [
this.data.manifestReadMethod, // blob1
],
});
}
}
37 changes: 35 additions & 2 deletions packages/workers-shared/asset-worker/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import { mockJaegerBinding } from "../../utils/tracing";
import { Analytics } from "./analytics";
import { AssetsManifest } from "./assets-manifest";
import { applyConfigurationDefaults } from "./configuration";
import { ExperimentAnalytics } from "./experiment-analytics";
import { decodePath, getIntent, handleRequest } from "./handler";
import { getAssetWithMetadataFromKV } from "./utils/kv";
import type {
Expand Down Expand Up @@ -39,6 +40,7 @@ export type Env = {
JAEGER: JaegerTracing;

ENVIRONMENT: Environment;
EXPERIMENT_ANALYTICS: ReadyAnalytics;
ANALYTICS: ReadyAnalytics;
COLO_METADATA: ColoMetadata;
UNSAFE_PERFORMANCE: UnsafePerformanceTimer;
Expand Down Expand Up @@ -212,7 +214,38 @@ export default class extends WorkerEntrypoint<Env> {
}

async unstable_exists(pathname: string): Promise<string | null> {
const assetsManifest = new AssetsManifest(this.env.ASSETS_MANIFEST);
return await assetsManifest.get(pathname);
const analytics = new ExperimentAnalytics(this.env.EXPERIMENT_ANALYTICS);
const performance = new PerformanceTimer(this.env.UNSAFE_PERFORMANCE);

const INTERPOLATION_EXPERIMENT_SAMPLE_RATE = 0;
WillTaylorDev marked this conversation as resolved.
Show resolved Hide resolved
let searchMethod: "binary" | "interpolation" = "binary";
if (Math.random() < INTERPOLATION_EXPERIMENT_SAMPLE_RATE) {
searchMethod = "interpolation";
}
analytics.setData({ manifestReadMethod: searchMethod });

if (
this.env.COLO_METADATA &&
this.env.VERSION_METADATA &&
this.env.CONFIG
) {
analytics.setData({
accountId: this.env.CONFIG.account_id,
experimentName: "manifest-read-timing",
});
}

const startTimeMs = performance.now();
try {
const assetsManifest = new AssetsManifest(this.env.ASSETS_MANIFEST);
WalshyDev marked this conversation as resolved.
Show resolved Hide resolved
if (searchMethod === "interpolation") {
return await assetsManifest.getWithInterpolationSearch(pathname);
} else {
return await assetsManifest.getWithBinarySearch(pathname);
}
} finally {
analytics.setData({ manifestReadTime: performance.now() - startTimeMs });
analytics.write();
}
}
}
Loading
Loading