Skip to content

Commit

Permalink
Auto merge of #113542 - saethlin:adaptive-tables, r=b-naber
Browse files Browse the repository at this point in the history
Adapt table sizes to the contents

This is an implementation of rust-lang/compiler-team#666

The objective of this PR is to permit the rmeta format to accommodate larger crates that need offsets larger than a `u32` can store without compromising performance for crates that do not need such range. The second commit is a number of tiny optimization opportunities I noticed while looking at perf recordings of the first commit.

The rmeta tables need to have fixed-size elements to permit lazy random access. But the size only needs to be fixed _per table_, not per element type. This PR adds another `usize` to the table header which indicates the table element size. As each element of a table is set, we keep track of the widest encoded table value, then don't bother encoding all the unused trailing bytes on each value. When decoding table elements, we copy them to a full-width array if they are not already full-width.

`LazyArray` needs some special treatment. Most other values that are encoded in tables are indexes or offsets, and those tend to be small so we get to drop a lot of zero bytes off the end. But `LazyArray` encodes _two_ small values in a fixed-width table element: A position of the table and the length of the table. The treatment described above could trim zero bytes off the table length, but any nonzero length shields the position bytes from the optimization. To improve this, we interleave the bytes of position and length. This change is responsible for about half of the crate metadata win on many crates.

Fixes #112934 (probably)
Fixes #103607
  • Loading branch information
bors committed Aug 30, 2023
2 parents 6e8f677 + 225b3c0 commit d64c845
Show file tree
Hide file tree
Showing 4 changed files with 125 additions and 47 deletions.
14 changes: 11 additions & 3 deletions compiler/rustc_metadata/src/rmeta/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ impl<'a, 'tcx> Metadata<'a, 'tcx> for (CrateMetadataRef<'a>, TyCtxt<'tcx>) {
}

impl<T: ParameterizedOverTcx> LazyValue<T> {
#[inline]
fn decode<'a, 'tcx, M: Metadata<'a, 'tcx>>(self, metadata: M) -> T::Value<'tcx>
where
T::Value<'tcx>: Decodable<DecodeContext<'a, 'tcx>>,
Expand Down Expand Up @@ -294,6 +295,7 @@ unsafe impl<'a, 'tcx, T: Decodable<DecodeContext<'a, 'tcx>>> TrustedLen
}

impl<T: ParameterizedOverTcx> LazyArray<T> {
#[inline]
fn decode<'a, 'tcx, M: Metadata<'a, 'tcx>>(
self,
metadata: M,
Expand Down Expand Up @@ -360,8 +362,8 @@ impl<'a, 'tcx> DecodeContext<'a, 'tcx> {
self.read_lazy_offset_then(|pos| LazyArray::from_position_and_num_elems(pos, len))
}

fn read_lazy_table<I, T>(&mut self, len: usize) -> LazyTable<I, T> {
self.read_lazy_offset_then(|pos| LazyTable::from_position_and_encoded_size(pos, len))
fn read_lazy_table<I, T>(&mut self, width: usize, len: usize) -> LazyTable<I, T> {
self.read_lazy_offset_then(|pos| LazyTable::from_position_and_encoded_size(pos, width, len))
}

#[inline]
Expand Down Expand Up @@ -420,25 +422,29 @@ impl<'a, 'tcx> TyDecoder for DecodeContext<'a, 'tcx> {
}

impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for CrateNum {
#[inline]
fn decode(d: &mut DecodeContext<'a, 'tcx>) -> CrateNum {
let cnum = CrateNum::from_u32(d.read_u32());
d.map_encoded_cnum_to_current(cnum)
}
}

impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for DefIndex {
#[inline]
fn decode(d: &mut DecodeContext<'a, 'tcx>) -> DefIndex {
DefIndex::from_u32(d.read_u32())
}
}

impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for ExpnIndex {
#[inline]
fn decode(d: &mut DecodeContext<'a, 'tcx>) -> ExpnIndex {
ExpnIndex::from_u32(d.read_u32())
}
}

impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for ast::AttrId {
#[inline]
fn decode(d: &mut DecodeContext<'a, 'tcx>) -> ast::AttrId {
let sess = d.sess.expect("can't decode AttrId without Session");
sess.parse_sess.attr_id_generator.mk_attr_id()
Expand Down Expand Up @@ -672,6 +678,7 @@ impl<'a, 'tcx, T> Decodable<DecodeContext<'a, 'tcx>> for LazyValue<T> {
}

impl<'a, 'tcx, T> Decodable<DecodeContext<'a, 'tcx>> for LazyArray<T> {
#[inline]
fn decode(decoder: &mut DecodeContext<'a, 'tcx>) -> Self {
let len = decoder.read_usize();
if len == 0 { LazyArray::default() } else { decoder.read_lazy_array(len) }
Expand All @@ -680,8 +687,9 @@ impl<'a, 'tcx, T> Decodable<DecodeContext<'a, 'tcx>> for LazyArray<T> {

impl<'a, 'tcx, I: Idx, T> Decodable<DecodeContext<'a, 'tcx>> for LazyTable<I, T> {
fn decode(decoder: &mut DecodeContext<'a, 'tcx>) -> Self {
let width = decoder.read_usize();
let len = decoder.read_usize();
decoder.read_lazy_table(len)
decoder.read_lazy_table(width, len)
}
}

Expand Down
3 changes: 2 additions & 1 deletion compiler/rustc_metadata/src/rmeta/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,8 @@ impl<'a, 'tcx, T> Encodable<EncodeContext<'a, 'tcx>> for LazyArray<T> {

impl<'a, 'tcx, I, T> Encodable<EncodeContext<'a, 'tcx>> for LazyTable<I, T> {
fn encode(&self, e: &mut EncodeContext<'a, 'tcx>) {
e.emit_usize(self.encoded_size);
e.emit_usize(self.width);
e.emit_usize(self.len);
e.emit_lazy_distance(self.position);
}
}
Expand Down
11 changes: 8 additions & 3 deletions compiler/rustc_metadata/src/rmeta/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,11 @@ impl<T> LazyArray<T> {
/// eagerly and in-order.
struct LazyTable<I, T> {
position: NonZeroUsize,
encoded_size: usize,
/// The encoded size of the elements of a table is selected at runtime to drop
/// trailing zeroes. This is the number of bytes used for each table element.
width: usize,
/// How many elements are in the table.
len: usize,
_marker: PhantomData<fn(I) -> T>,
}

Expand All @@ -153,9 +157,10 @@ impl<I: 'static, T: ParameterizedOverTcx> ParameterizedOverTcx for LazyTable<I,
impl<I, T> LazyTable<I, T> {
fn from_position_and_encoded_size(
position: NonZeroUsize,
encoded_size: usize,
width: usize,
len: usize,
) -> LazyTable<I, T> {
LazyTable { position, encoded_size, _marker: PhantomData }
LazyTable { position, width, len, _marker: PhantomData }
}
}

Expand Down
144 changes: 104 additions & 40 deletions compiler/rustc_metadata/src/rmeta/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ impl IsDefault for u32 {
}
}

impl IsDefault for u64 {
fn is_default(&self) -> bool {
*self == 0
}
}

impl<T> IsDefault for LazyArray<T> {
fn is_default(&self) -> bool {
self.num_elems == 0
Expand Down Expand Up @@ -89,6 +95,20 @@ impl FixedSizeEncoding for u32 {
}
}

impl FixedSizeEncoding for u64 {
type ByteArray = [u8; 8];

#[inline]
fn from_bytes(b: &[u8; 8]) -> Self {
Self::from_le_bytes(*b)
}

#[inline]
fn write_to_bytes(self, b: &mut [u8; 8]) {
*b = self.to_le_bytes();
}
}

macro_rules! fixed_size_enum {
($ty:ty { $(($($pat:tt)*))* }) => {
impl FixedSizeEncoding for Option<$ty> {
Expand Down Expand Up @@ -300,21 +320,21 @@ impl FixedSizeEncoding for UnusedGenericParams {
// generic `LazyValue<T>` impl, but in the general case we might not need / want
// to fit every `usize` in `u32`.
impl<T> FixedSizeEncoding for Option<LazyValue<T>> {
type ByteArray = [u8; 4];
type ByteArray = [u8; 8];

#[inline]
fn from_bytes(b: &[u8; 4]) -> Self {
let position = NonZeroUsize::new(u32::from_bytes(b) as usize)?;
fn from_bytes(b: &[u8; 8]) -> Self {
let position = NonZeroUsize::new(u64::from_bytes(b) as usize)?;
Some(LazyValue::from_position(position))
}

#[inline]
fn write_to_bytes(self, b: &mut [u8; 4]) {
fn write_to_bytes(self, b: &mut [u8; 8]) {
match self {
None => unreachable!(),
Some(lazy) => {
let position = lazy.position.get();
let position: u32 = position.try_into().unwrap();
let position: u64 = position.try_into().unwrap();
position.write_to_bytes(b)
}
}
Expand All @@ -323,55 +343,75 @@ impl<T> FixedSizeEncoding for Option<LazyValue<T>> {

impl<T> LazyArray<T> {
#[inline]
fn write_to_bytes_impl(self, b: &mut [u8; 8]) {
let ([position_bytes, meta_bytes], []) = b.as_chunks_mut::<4>() else { panic!() };

let position = self.position.get();
let position: u32 = position.try_into().unwrap();
position.write_to_bytes(position_bytes);

let len = self.num_elems;
let len: u32 = len.try_into().unwrap();
len.write_to_bytes(meta_bytes);
fn write_to_bytes_impl(self, b: &mut [u8; 16]) {
let position = (self.position.get() as u64).to_le_bytes();
let len = (self.num_elems as u64).to_le_bytes();

// Element width is selected at runtime on a per-table basis by omitting trailing
// zero bytes in table elements. This works very naturally when table elements are
// simple numbers but `LazyArray` is a pair of integers. If naively encoded, the second
// element would shield the trailing zeroes in the first. Interleaving the bytes
// of the position and length exposes trailing zeroes in both to the optimization.
// We encode length second because we generally expect it to be smaller.
for i in 0..8 {
b[2 * i] = position[i];
b[2 * i + 1] = len[i];
}
}

fn from_bytes_impl(position_bytes: &[u8; 4], meta_bytes: &[u8; 4]) -> Option<LazyArray<T>> {
let position = NonZeroUsize::new(u32::from_bytes(position_bytes) as usize)?;
let len = u32::from_bytes(meta_bytes) as usize;
fn from_bytes_impl(position: &[u8; 8], meta: &[u8; 8]) -> Option<LazyArray<T>> {
let position = NonZeroUsize::new(u64::from_bytes(&position) as usize)?;
let len = u64::from_bytes(&meta) as usize;
Some(LazyArray::from_position_and_num_elems(position, len))
}
}

// Decoding helper for the encoding scheme used by `LazyArray`.
// Interleaving the bytes of the two integers exposes trailing bytes in the first integer
// to the varint scheme that we use for tables.
#[inline]
fn decode_interleaved(encoded: &[u8; 16]) -> ([u8; 8], [u8; 8]) {
let mut first = [0u8; 8];
let mut second = [0u8; 8];
for i in 0..8 {
first[i] = encoded[2 * i];
second[i] = encoded[2 * i + 1];
}
(first, second)
}

impl<T> FixedSizeEncoding for LazyArray<T> {
type ByteArray = [u8; 8];
type ByteArray = [u8; 16];

#[inline]
fn from_bytes(b: &[u8; 8]) -> Self {
let ([position_bytes, meta_bytes], []) = b.as_chunks::<4>() else { panic!() };
if *meta_bytes == [0; 4] {
fn from_bytes(b: &[u8; 16]) -> Self {
let (position, meta) = decode_interleaved(b);

if meta == [0; 8] {
return Default::default();
}
LazyArray::from_bytes_impl(position_bytes, meta_bytes).unwrap()
LazyArray::from_bytes_impl(&position, &meta).unwrap()
}

#[inline]
fn write_to_bytes(self, b: &mut [u8; 8]) {
fn write_to_bytes(self, b: &mut [u8; 16]) {
assert!(!self.is_default());
self.write_to_bytes_impl(b)
}
}

impl<T> FixedSizeEncoding for Option<LazyArray<T>> {
type ByteArray = [u8; 8];
type ByteArray = [u8; 16];

#[inline]
fn from_bytes(b: &[u8; 8]) -> Self {
let ([position_bytes, meta_bytes], []) = b.as_chunks::<4>() else { panic!() };
LazyArray::from_bytes_impl(position_bytes, meta_bytes)
fn from_bytes(b: &[u8; 16]) -> Self {
let (position, meta) = decode_interleaved(b);

LazyArray::from_bytes_impl(&position, &meta)
}

#[inline]
fn write_to_bytes(self, b: &mut [u8; 8]) {
fn write_to_bytes(self, b: &mut [u8; 16]) {
match self {
None => unreachable!(),
Some(lazy) => lazy.write_to_bytes_impl(b),
Expand All @@ -381,13 +421,14 @@ impl<T> FixedSizeEncoding for Option<LazyArray<T>> {

/// Helper for constructing a table's serialization (also see `Table`).
pub(super) struct TableBuilder<I: Idx, T: FixedSizeEncoding> {
width: usize,
blocks: IndexVec<I, T::ByteArray>,
_marker: PhantomData<T>,
}

impl<I: Idx, T: FixedSizeEncoding> Default for TableBuilder<I, T> {
fn default() -> Self {
TableBuilder { blocks: Default::default(), _marker: PhantomData }
TableBuilder { width: 0, blocks: Default::default(), _marker: PhantomData }
}
}

Expand Down Expand Up @@ -415,40 +456,63 @@ impl<I: Idx, const N: usize, T: FixedSizeEncoding<ByteArray = [u8; N]>> TableBui
// > store bit-masks of which item in each bucket is actually serialized).
let block = self.blocks.ensure_contains_elem(i, || [0; N]);
value.write_to_bytes(block);
if self.width != N {
let width = N - trailing_zeros(block);
self.width = self.width.max(width);
}
}
}

pub(crate) fn encode(&self, buf: &mut FileEncoder) -> LazyTable<I, T> {
let pos = buf.position();

let width = self.width;
for block in &self.blocks {
buf.emit_raw_bytes(block);
buf.emit_raw_bytes(&block[..width]);
}
let num_bytes = self.blocks.len() * N;

LazyTable::from_position_and_encoded_size(
NonZeroUsize::new(pos as usize).unwrap(),
num_bytes,
width,
self.blocks.len(),
)
}
}

fn trailing_zeros(x: &[u8]) -> usize {
x.iter().rev().take_while(|b| **b == 0).count()
}

impl<I: Idx, const N: usize, T: FixedSizeEncoding<ByteArray = [u8; N]> + ParameterizedOverTcx>
LazyTable<I, T>
where
for<'tcx> T::Value<'tcx>: FixedSizeEncoding<ByteArray = [u8; N]>,
{
/// Given the metadata, extract out the value at a particular index (if any).
#[inline(never)]
pub(super) fn get<'a, 'tcx, M: Metadata<'a, 'tcx>>(&self, metadata: M, i: I) -> T::Value<'tcx> {
trace!("LazyTable::lookup: index={:?} len={:?}", i, self.encoded_size);
trace!("LazyTable::lookup: index={:?} len={:?}", i, self.len);

// Access past the end of the table returns a Default
if i.index() >= self.len {
return Default::default();
}

let start = self.position.get();
let bytes = &metadata.blob()[start..start + self.encoded_size];
let (bytes, []) = bytes.as_chunks::<N>() else { panic!() };
bytes.get(i.index()).map_or_else(Default::default, FixedSizeEncoding::from_bytes)
let width = self.width;
let start = self.position.get() + (width * i.index());
let end = start + width;
let bytes = &metadata.blob()[start..end];

if let Ok(fixed) = bytes.try_into() {
FixedSizeEncoding::from_bytes(fixed)
} else {
let mut fixed = [0u8; N];
fixed[..width].copy_from_slice(bytes);
FixedSizeEncoding::from_bytes(&fixed)
}
}

/// Size of the table in entries, including possible gaps.
pub(super) fn size(&self) -> usize {
self.encoded_size / N
self.len
}
}

0 comments on commit d64c845

Please sign in to comment.