From 225b3c0556602e831ed691eab11ed1468cca80e1 Mon Sep 17 00:00:00 2001 From: Ben Kimock Date: Tue, 29 Aug 2023 20:16:57 -0400 Subject: [PATCH] Document in the code how this scheme works --- compiler/rustc_metadata/src/rmeta/mod.rs | 3 ++ compiler/rustc_metadata/src/rmeta/table.rs | 36 +++++++++++++--------- 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/compiler/rustc_metadata/src/rmeta/mod.rs b/compiler/rustc_metadata/src/rmeta/mod.rs index 1bd86e543079e..7f022695e3730 100644 --- a/compiler/rustc_metadata/src/rmeta/mod.rs +++ b/compiler/rustc_metadata/src/rmeta/mod.rs @@ -136,7 +136,10 @@ impl LazyArray { /// eagerly and in-order. struct LazyTable { position: NonZeroUsize, + /// The encoded size of the elements of a table is selected at runtime to drop + /// trailing zeroes. This is the number of bytes used for each table element. width: usize, + /// How many elements are in the table. len: usize, _marker: PhantomData T>, } diff --git a/compiler/rustc_metadata/src/rmeta/table.rs b/compiler/rustc_metadata/src/rmeta/table.rs index 2f807b8083e37..d572a65d00423 100644 --- a/compiler/rustc_metadata/src/rmeta/table.rs +++ b/compiler/rustc_metadata/src/rmeta/table.rs @@ -346,6 +346,12 @@ impl LazyArray { let position = (self.position.get() as u64).to_le_bytes(); let len = (self.num_elems as u64).to_le_bytes(); + // Element width is selected at runtime on a per-table basis by omitting trailing + // zero bytes in table elements. This works very naturally when table elements are + // simple numbers but `LazyArray` is a pair of integers. If naively encoded, the second + // element would shield the trailing zeroes in the first. Interleaving the bytes + // of the position and length exposes trailing zeroes in both to the optimization. + // We encode length second because we generally expect it to be smaller. for i in 0..8 { b[2 * i] = position[i]; b[2 * i + 1] = len[i]; @@ -359,18 +365,26 @@ impl LazyArray { } } +// Decoding helper for the encoding scheme used by `LazyArray`. +// Interleaving the bytes of the two integers exposes trailing bytes in the first integer +// to the varint scheme that we use for tables. +#[inline] +fn decode_interleaved(encoded: &[u8; 16]) -> ([u8; 8], [u8; 8]) { + let mut first = [0u8; 8]; + let mut second = [0u8; 8]; + for i in 0..8 { + first[i] = encoded[2 * i]; + second[i] = encoded[2 * i + 1]; + } + (first, second) +} + impl FixedSizeEncoding for LazyArray { type ByteArray = [u8; 16]; #[inline] fn from_bytes(b: &[u8; 16]) -> Self { - let mut position = [0u8; 8]; - let mut meta = [0u8; 8]; - - for i in 0..8 { - position[i] = b[2 * i]; - meta[i] = b[2 * i + 1]; - } + let (position, meta) = decode_interleaved(b); if meta == [0; 8] { return Default::default(); @@ -390,13 +404,7 @@ impl FixedSizeEncoding for Option> { #[inline] fn from_bytes(b: &[u8; 16]) -> Self { - let mut position = [0u8; 8]; - let mut meta = [0u8; 8]; - - for i in 0..8 { - position[i] = b[2 * i]; - meta[i] = b[2 * i + 1]; - } + let (position, meta) = decode_interleaved(b); LazyArray::from_bytes_impl(&position, &meta) }