Skip to content

Commit

Permalink
Adds read/write/get_pod() fns to tiered storage (solana-labs#34415)
Browse files Browse the repository at this point in the history
  • Loading branch information
brooksprumo authored Dec 14, 2023
1 parent 5b1aa63 commit c7a5767
Show file tree
Hide file tree
Showing 7 changed files with 170 additions and 51 deletions.
70 changes: 57 additions & 13 deletions accounts-db/src/tiered_storage/byte_block.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,31 @@ impl ByteBlockWriter {
self.len
}

/// Write plain ol' data to the internal buffer of the ByteBlockWriter instance
///
/// Prefer this over `write_type()`, as it prevents some undefined behavior.
pub fn write_pod<T: bytemuck::NoUninit>(&mut self, value: &T) -> IoResult<usize> {
// SAFETY: Since T is NoUninit, it does not contain any uninitialized bytes.
unsafe { self.write_type(value) }
}

/// Write the specified typed instance to the internal buffer of
/// the ByteBlockWriter instance.
pub fn write_type<T>(&mut self, value: &T) -> IoResult<usize> {
///
/// Prefer `write_pod()` when possible, because `write_type()` may cause
/// undefined behavior if `value` contains uninitialized bytes.
///
/// # Safety
///
/// Caller must ensure casting T to bytes is safe.
/// Refer to the Safety sections in std::slice::from_raw_parts()
/// and bytemuck's Pod and NoUninit for more information.
pub unsafe fn write_type<T>(&mut self, value: &T) -> IoResult<usize> {
let size = mem::size_of::<T>();
let ptr = value as *const _ as *const u8;
// SAFETY: The caller ensures that `value` contains no uninitialized bytes,
// we ensure the size is safe by querying T directly,
// and Rust ensures all values are at least byte-aligned.
let slice = unsafe { std::slice::from_raw_parts(ptr, size) };
self.write(slice)?;
Ok(size)
Expand All @@ -73,10 +93,10 @@ impl ByteBlockWriter {
) -> IoResult<usize> {
let mut size = 0;
if let Some(rent_epoch) = opt_fields.rent_epoch {
size += self.write_type(&rent_epoch)?;
size += self.write_pod(&rent_epoch)?;
}
if let Some(hash) = opt_fields.account_hash {
size += self.write_type(&hash)?;
size += self.write_pod(&hash)?;
}

debug_assert_eq!(size, opt_fields.size());
Expand Down Expand Up @@ -112,18 +132,40 @@ impl ByteBlockWriter {
/// The util struct for reading byte blocks.
pub struct ByteBlockReader;

/// Reads the raw part of the input byte_block, at the specified offset, as type T.
///
/// Returns None if `offset` + size_of::<T>() exceeds the size of the input byte_block.
///
/// Type T must be plain ol' data to ensure no undefined behavior.
pub fn read_pod<T: bytemuck::AnyBitPattern>(byte_block: &[u8], offset: usize) -> Option<&T> {
// SAFETY: Since T is AnyBitPattern, it is safe to cast bytes to T.
unsafe { read_type(byte_block, offset) }
}

/// Reads the raw part of the input byte_block at the specified offset
/// as type T.
///
/// If `offset` + size_of::<T>() exceeds the size of the input byte_block,
/// then None will be returned.
pub fn read_type<T>(byte_block: &[u8], offset: usize) -> Option<&T> {
///
/// Prefer `read_pod()` when possible, because `read_type()` may cause
/// undefined behavior.
///
/// # Safety
///
/// Caller must ensure casting bytes to T is safe.
/// Refer to the Safety sections in std::slice::from_raw_parts()
/// and bytemuck's Pod and AnyBitPattern for more information.
pub unsafe fn read_type<T>(byte_block: &[u8], offset: usize) -> Option<&T> {
let (next, overflow) = offset.overflowing_add(std::mem::size_of::<T>());
if overflow || next > byte_block.len() {
return None;
}
let ptr = byte_block[offset..].as_ptr() as *const T;
debug_assert!(ptr as usize % std::mem::align_of::<T>() == 0);
// SAFETY: The caller ensures it is safe to cast bytes to T,
// we ensure the size is safe by querying T directly,
// and we just checked above to ensure the ptr is aligned for T.
Some(unsafe { &*ptr })
}

Expand Down Expand Up @@ -169,7 +211,7 @@ mod tests {
let mut writer = ByteBlockWriter::new(format);
let value: u32 = 42;

writer.write_type(&value).unwrap();
writer.write_pod(&value).unwrap();
assert_eq!(writer.raw_len(), mem::size_of::<u32>());

let buffer = writer.finish().unwrap();
Expand Down Expand Up @@ -231,12 +273,14 @@ mod tests {
let test_data3 = [33u8; 300];

// Write the above meta and data in an interleaving way.
writer.write_type(&test_metas[0]).unwrap();
writer.write_type(&test_data1).unwrap();
writer.write_type(&test_metas[1]).unwrap();
writer.write_type(&test_data2).unwrap();
writer.write_type(&test_metas[2]).unwrap();
writer.write_type(&test_data3).unwrap();
unsafe {
writer.write_type(&test_metas[0]).unwrap();
writer.write_type(&test_data1).unwrap();
writer.write_type(&test_metas[1]).unwrap();
writer.write_type(&test_data2).unwrap();
writer.write_type(&test_metas[2]).unwrap();
writer.write_type(&test_data3).unwrap();
}
assert_eq!(
writer.raw_len(),
mem::size_of::<TestMetaStruct>() * 3
Expand Down Expand Up @@ -346,13 +390,13 @@ mod tests {
let mut offset = 0;
for opt_fields in &opt_fields_vec {
if let Some(expected_rent_epoch) = opt_fields.rent_epoch {
let rent_epoch = read_type::<Epoch>(&decoded_buffer, offset).unwrap();
let rent_epoch = read_pod::<Epoch>(&decoded_buffer, offset).unwrap();
assert_eq!(*rent_epoch, expected_rent_epoch);
verified_count += 1;
offset += std::mem::size_of::<Epoch>();
}
if let Some(expected_hash) = opt_fields.account_hash {
let hash = read_type::<AccountHash>(&decoded_buffer, offset).unwrap();
let hash = read_pod::<AccountHash>(&decoded_buffer, offset).unwrap();
assert_eq!(hash, &expected_hash);
verified_count += 1;
offset += std::mem::size_of::<AccountHash>();
Expand Down
56 changes: 49 additions & 7 deletions accounts-db/src/tiered_storage/file.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
use std::{
fs::{File, OpenOptions},
io::{Read, Result as IoResult, Seek, SeekFrom, Write},
mem,
path::Path,
use {
bytemuck::{AnyBitPattern, NoUninit},
std::{
fs::{File, OpenOptions},
io::{Read, Result as IoResult, Seek, SeekFrom, Write},
mem,
path::Path,
},
};

#[derive(Debug)]
Expand Down Expand Up @@ -33,14 +36,53 @@ impl TieredStorageFile {
))
}

pub fn write_type<T>(&self, value: &T) -> IoResult<usize> {
/// Writes `value` to the file.
///
/// `value` must be plain ol' data.
pub fn write_pod<T: NoUninit>(&self, value: &T) -> IoResult<usize> {
// SAFETY: Since T is NoUninit, it does not contain any uninitialized bytes.
unsafe { self.write_type(value) }
}

/// Writes `value` to the file.
///
/// Prefer `write_pod` when possible, because `write_value` may cause
/// undefined behavior if `value` contains uninitialized bytes.
///
/// # Safety
///
/// Caller must ensure casting T to bytes is safe.
/// Refer to the Safety sections in std::slice::from_raw_parts()
/// and bytemuck's Pod and NoUninit for more information.
pub unsafe fn write_type<T>(&self, value: &T) -> IoResult<usize> {
let ptr = value as *const _ as *const u8;
let bytes = unsafe { std::slice::from_raw_parts(ptr, mem::size_of::<T>()) };
self.write_bytes(bytes)
}

pub fn read_type<T>(&self, value: &mut T) -> IoResult<()> {
/// Reads a value of type `T` from the file.
///
/// Type T must be plain ol' data.
pub fn read_pod<T: NoUninit + AnyBitPattern>(&self, value: &mut T) -> IoResult<()> {
// SAFETY: Since T is AnyBitPattern, it is safe to cast bytes to T.
unsafe { self.read_type(value) }
}

/// Reads a value of type `T` from the file.
///
/// Prefer `read_pod()` when possible, because `read_type()` may cause
/// undefined behavior.
///
/// # Safety
///
/// Caller must ensure casting bytes to T is safe.
/// Refer to the Safety sections in std::slice::from_raw_parts()
/// and bytemuck's Pod and AnyBitPattern for more information.
pub unsafe fn read_type<T>(&self, value: &mut T) -> IoResult<()> {
let ptr = value as *mut _ as *mut u8;
// SAFETY: The caller ensures it is safe to cast bytes to T,
// we ensure the size is safe by querying T directly,
// and Rust ensures ptr is aligned.
let bytes = unsafe { std::slice::from_raw_parts_mut(ptr, mem::size_of::<T>()) };
self.read_bytes(bytes)
}
Expand Down
32 changes: 20 additions & 12 deletions accounts-db/src/tiered_storage/footer.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
use {
crate::tiered_storage::{
error::TieredStorageError, file::TieredStorageFile, index::IndexBlockFormat,
mmap_utils::get_type, TieredStorageResult,
error::TieredStorageError,
file::TieredStorageFile,
index::IndexBlockFormat,
mmap_utils::{get_pod, get_type},
TieredStorageResult,
},
bytemuck::{Pod, Zeroable},
memmap2::Mmap,
Expand Down Expand Up @@ -204,8 +207,9 @@ impl TieredStorageFooter {
}

pub fn write_footer_block(&self, file: &TieredStorageFile) -> TieredStorageResult<()> {
file.write_type(self)?;
file.write_type(&TieredStorageMagicNumber::default())?;
// SAFETY: The footer does not contain any uninitialized bytes.
unsafe { file.write_type(self)? };
file.write_pod(&TieredStorageMagicNumber::default())?;

Ok(())
}
Expand All @@ -214,13 +218,13 @@ impl TieredStorageFooter {
file.seek_from_end(-(FOOTER_TAIL_SIZE as i64))?;

let mut footer_version: u64 = 0;
file.read_type(&mut footer_version)?;
file.read_pod(&mut footer_version)?;
if footer_version != FOOTER_FORMAT_VERSION {
return Err(TieredStorageError::InvalidFooterVersion(footer_version));
}

let mut footer_size: u64 = 0;
file.read_type(&mut footer_size)?;
file.read_pod(&mut footer_size)?;
if footer_size != FOOTER_SIZE as u64 {
return Err(TieredStorageError::InvalidFooterSize(
footer_size,
Expand All @@ -229,7 +233,7 @@ impl TieredStorageFooter {
}

let mut magic_number = TieredStorageMagicNumber::zeroed();
file.read_type(&mut magic_number)?;
file.read_pod(&mut magic_number)?;
if magic_number != TieredStorageMagicNumber::default() {
return Err(TieredStorageError::MagicNumberMismatch(
TieredStorageMagicNumber::default().0,
Expand All @@ -239,7 +243,9 @@ impl TieredStorageFooter {

let mut footer = Self::default();
file.seek_from_end(-(footer_size as i64))?;
file.read_type(&mut footer)?;
// SAFETY: We sanitize the footer to ensure all the bytes are
// actually safe to interpret as a TieredStorageFooter.
unsafe { file.read_type(&mut footer)? };
Self::sanitize(&footer)?;

Ok(footer)
Expand All @@ -248,20 +254,20 @@ impl TieredStorageFooter {
pub fn new_from_mmap(mmap: &Mmap) -> TieredStorageResult<&TieredStorageFooter> {
let offset = mmap.len().saturating_sub(FOOTER_TAIL_SIZE);

let (footer_version, offset) = get_type::<u64>(mmap, offset)?;
let (footer_version, offset) = get_pod::<u64>(mmap, offset)?;
if *footer_version != FOOTER_FORMAT_VERSION {
return Err(TieredStorageError::InvalidFooterVersion(*footer_version));
}

let (&footer_size, offset) = get_type::<u64>(mmap, offset)?;
let (&footer_size, offset) = get_pod::<u64>(mmap, offset)?;
if footer_size != FOOTER_SIZE as u64 {
return Err(TieredStorageError::InvalidFooterSize(
footer_size,
FOOTER_SIZE as u64,
));
}

let (magic_number, _offset) = get_type::<TieredStorageMagicNumber>(mmap, offset)?;
let (magic_number, _offset) = get_pod::<TieredStorageMagicNumber>(mmap, offset)?;
if *magic_number != TieredStorageMagicNumber::default() {
return Err(TieredStorageError::MagicNumberMismatch(
TieredStorageMagicNumber::default().0,
Expand All @@ -270,7 +276,9 @@ impl TieredStorageFooter {
}

let footer_offset = mmap.len().saturating_sub(footer_size as usize);
let (footer, _offset) = get_type::<TieredStorageFooter>(mmap, footer_offset)?;
// SAFETY: We sanitize the footer to ensure all the bytes are
// actually safe to interpret as a TieredStorageFooter.
let (footer, _offset) = unsafe { get_type::<TieredStorageFooter>(mmap, footer_offset)? };
Self::sanitize(footer)?;

Ok(footer)
Expand Down
21 changes: 12 additions & 9 deletions accounts-db/src/tiered_storage/hot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use {
},
index::{AccountOffset, IndexBlockFormat, IndexOffset},
meta::{AccountMetaFlags, AccountMetaOptionalFields, TieredAccountMeta},
mmap_utils::get_type,
mmap_utils::get_pod,
owners::{OwnerOffset, OwnersBlock},
TieredStorageError, TieredStorageFormat, TieredStorageResult,
},
Expand Down Expand Up @@ -204,7 +204,7 @@ impl TieredAccountMeta for HotAccountMeta {
.then(|| {
let offset = self.optional_fields_offset(account_block)
+ AccountMetaOptionalFields::rent_epoch_offset(self.flags());
byte_block::read_type::<Epoch>(account_block, offset).copied()
byte_block::read_pod::<Epoch>(account_block, offset).copied()
})
.flatten()
}
Expand All @@ -217,7 +217,7 @@ impl TieredAccountMeta for HotAccountMeta {
.then(|| {
let offset = self.optional_fields_offset(account_block)
+ AccountMetaOptionalFields::account_hash_offset(self.flags());
byte_block::read_type::<AccountHash>(account_block, offset)
byte_block::read_pod::<AccountHash>(account_block, offset)
})
.flatten()
}
Expand Down Expand Up @@ -283,7 +283,7 @@ impl HotStorageReader {
) -> TieredStorageResult<&HotAccountMeta> {
let internal_account_offset = account_offset.offset();

let (meta, _) = get_type::<HotAccountMeta>(&self.mmap, internal_account_offset)?;
let (meta, _) = get_pod::<HotAccountMeta>(&self.mmap, internal_account_offset)?;
Ok(meta)
}

Expand Down Expand Up @@ -452,13 +452,16 @@ pub mod tests {
.with_flags(&flags);

let mut writer = ByteBlockWriter::new(AccountBlockFormat::AlignedRaw);
writer.write_type(&expected_meta).unwrap();
writer.write_type(&account_data).unwrap();
writer.write_type(&padding).unwrap();
writer.write_pod(&expected_meta).unwrap();
// SAFETY: These values are POD, so they are safe to write.
unsafe {
writer.write_type(&account_data).unwrap();
writer.write_type(&padding).unwrap();
}
writer.write_optional_fields(&optional_fields).unwrap();
let buffer = writer.finish().unwrap();

let meta = byte_block::read_type::<HotAccountMeta>(&buffer, 0).unwrap();
let meta = byte_block::read_pod::<HotAccountMeta>(&buffer, 0).unwrap();
assert_eq!(expected_meta, *meta);
assert!(meta.flags().has_rent_epoch());
assert!(meta.flags().has_account_hash());
Expand Down Expand Up @@ -548,7 +551,7 @@ pub mod tests {
.iter()
.map(|meta| {
let prev_offset = current_offset;
current_offset += file.write_type(meta).unwrap();
current_offset += file.write_pod(meta).unwrap();
HotAccountOffset::new(prev_offset).unwrap()
})
.collect();
Expand Down
Loading

0 comments on commit c7a5767

Please sign in to comment.