Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix corrupted DB on networks where the first slot is skipped (Holesky) #4985

Merged
merged 3 commits into from
Dec 7, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ pub fn upgrade_to_v18<T: BeaconChainTypes>(
db: Arc<HotColdDB<T::EthSpec, T::HotStore, T::ColdStore>>,
log: Logger,
) -> Result<Vec<KeyValueStoreOp>, Error> {
db.heal_freezer_block_roots()?;
db.heal_freezer_block_roots_at_split()?;
db.heal_freezer_block_roots_at_genesis()?;
info!(log, "Healed freezer block roots");

// No-op, even if Deneb has already occurred. The database is probably borked in this case, but
Expand Down
98 changes: 87 additions & 11 deletions beacon_node/beacon_chain/tests/store_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ use std::collections::HashSet;
use std::convert::TryInto;
use std::sync::Arc;
use std::time::Duration;
use store::chunked_vector::Chunk;
use store::metadata::{SchemaVersion, CURRENT_SCHEMA_VERSION, STATE_UPPER_LIMIT_NO_RETAIN};
use store::{
chunked_vector::{chunk_key, Field},
Expand Down Expand Up @@ -106,10 +107,10 @@ fn get_harness_generic(
harness
}

/// Tests that `store.heal_freezer_block_roots` inserts block roots between last restore point
/// Tests that `store.heal_freezer_block_roots_at_split` inserts block roots between last restore point
/// slot and the split slot.
#[tokio::test]
async fn heal_freezer_block_roots() {
async fn heal_freezer_block_roots_at_split() {
// chunk_size is hard-coded to 128
let num_blocks_produced = E::slots_per_epoch() * 20;
let db_path = tempdir().unwrap();
Expand All @@ -136,7 +137,7 @@ async fn heal_freezer_block_roots() {

// Do a heal before deleting to make sure that it doesn't break.
let last_restore_point_slot = Slot::new(16 * E::slots_per_epoch());
store.heal_freezer_block_roots().unwrap();
store.heal_freezer_block_roots_at_split().unwrap();
check_freezer_block_roots(&harness, last_restore_point_slot, split_slot);

// Delete block roots between `last_restore_point_slot` and `split_slot`.
Expand Down Expand Up @@ -164,7 +165,7 @@ async fn heal_freezer_block_roots() {
assert!(matches!(block_root_err, store::Error::NoContinuationData));

// Re-insert block roots
store.heal_freezer_block_roots().unwrap();
store.heal_freezer_block_roots_at_split().unwrap();
check_freezer_block_roots(&harness, last_restore_point_slot, split_slot);

// Run for another two epochs to check that the invariant is maintained.
Expand Down Expand Up @@ -243,7 +244,7 @@ async fn heal_freezer_block_roots_with_skip_slots() {
assert!(matches!(block_root_err, store::Error::NoContinuationData));

// heal function
store.heal_freezer_block_roots().unwrap();
store.heal_freezer_block_roots_at_split().unwrap();
check_freezer_block_roots(&harness, last_restore_point_slot, split_slot);

// Run for another two epochs to check that the invariant is maintained.
Expand All @@ -257,12 +258,87 @@ async fn heal_freezer_block_roots_with_skip_slots() {
check_iterators(&harness);
}

fn check_freezer_block_roots(
harness: &TestHarness,
last_restore_point_slot: Slot,
split_slot: Slot,
) {
for slot in (last_restore_point_slot.as_u64()..split_slot.as_u64()).map(Slot::new) {
/// Tests that `store.heal_freezer_block_roots_at_genesis` replaces 0x0 block roots between slot
/// 0 and the first non-skip slot with genesis block root.
#[tokio::test]
async fn heal_freezer_block_roots_at_genesis() {
// Run for a few epochs to ensure we're past finalization.
let num_blocks_produced = E::slots_per_epoch() * 4;
let db_path = tempdir().unwrap();
let store = get_store(&db_path);
let harness = get_harness(store.clone(), LOW_VALIDATOR_COUNT);

// Start with 2 skip slots.
harness.advance_slot();
harness.advance_slot();

harness
.extend_chain(
num_blocks_produced as usize,
BlockStrategy::OnCanonicalHead,
AttestationStrategy::AllValidators,
)
.await;

// Do a heal before deleting to make sure that it doesn't break.
store.heal_freezer_block_roots_at_genesis().unwrap();
check_freezer_block_roots(
&harness,
Slot::new(0),
Epoch::new(1).end_slot(E::slots_per_epoch()),
);

// Write 0x0 block roots at slot 1 and slot 2.
let chunk_index = 0;
let chunk_db_key = chunk_key(chunk_index);
let mut chunk =
Chunk::<Hash256>::load(&store.cold_db, DBColumn::BeaconBlockRoots, &chunk_db_key)
.unwrap()
.unwrap();

chunk.values[1] = Hash256::zero();
chunk.values[2] = Hash256::zero();

let mut ops = vec![];
chunk
.store(DBColumn::BeaconBlockRoots, &chunk_db_key, &mut ops)
.unwrap();
store.cold_db.do_atomically(ops).unwrap();

// Ensure the DB is corrupted
let block_roots = store
.forwards_block_roots_iterator_until(
Slot::new(1),
Slot::new(2),
|| unreachable!(),
&harness.chain.spec,
)
.unwrap()
.map(|r| {
println!("{r:?}");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lets delete this println before merging

r.unwrap()
})
.take(2)
.collect::<Vec<_>>();
assert_eq!(
block_roots,
vec![
(Hash256::zero(), Slot::new(1)),
(Hash256::zero(), Slot::new(2))
]
);

// Insert genesis block roots at skip slots before first block slot
store.heal_freezer_block_roots_at_genesis().unwrap();
check_freezer_block_roots(
&harness,
Slot::new(0),
Epoch::new(1).end_slot(E::slots_per_epoch()),
);
}

fn check_freezer_block_roots(harness: &TestHarness, start_slot: Slot, end_slot: Slot) {
for slot in (start_slot.as_u64()..end_slot.as_u64()).map(Slot::new) {
let (block_root, result_slot) = harness
.chain
.store
Expand Down
51 changes: 49 additions & 2 deletions beacon_node/store/src/hot_cold_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2216,7 +2216,7 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> HotColdDB<E, Hot, Cold>

/// This function fills in missing block roots between last restore point slot and split
/// slot, if any.
pub fn heal_freezer_block_roots(&self) -> Result<(), Error> {
pub fn heal_freezer_block_roots_at_split(&self) -> Result<(), Error> {
let split = self.get_split_info();
let last_restore_point_slot = (split.slot - 1) / self.config.slots_per_restore_point
* self.config.slots_per_restore_point;
Expand Down Expand Up @@ -2245,6 +2245,53 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> HotColdDB<E, Hot, Cold>
Ok(())
}

pub fn heal_freezer_block_roots_at_genesis(&self) -> Result<(), Error> {
let oldest_block_slot = self.get_oldest_block_slot();
let split_slot = self.get_split_slot();

// Check if backfill has been completed AND the freezer db has data in it
if oldest_block_slot != 0 || split_slot == 0 {
return Ok(());
}

let mut block_root_iter = self.forwards_block_roots_iterator_until(
Slot::new(0),
split_slot - 1,
|| {
Err(Error::DBError {
message: "Should not require end state".to_string(),
})
},
&self.spec,
)?;

let (genesis_block_root, _) = block_root_iter.next().ok_or_else(|| Error::DBError {
message: "Genesis block root missing".to_string(),
})??;

let slots_to_fix = itertools::process_results(block_root_iter, |iter| {
iter.take_while(|(block_root, _)| block_root.is_zero())
.map(|(_, slot)| slot)
.collect::<Vec<_>>()
})?;

let Some(first_slot) = slots_to_fix.first() else {
return Ok(());
};

let mut chunk_writer =
ChunkWriter::<BlockRoots, _, _>::new(&self.cold_db, first_slot.as_usize())?;
let mut ops = vec![];
for slot in slots_to_fix {
chunk_writer.set(slot.as_usize(), genesis_block_root, &mut ops)?;
}

chunk_writer.write(&mut ops)?;
self.cold_db.do_atomically(ops)?;

Ok(())
}

/// Delete *all* states from the freezer database and update the anchor accordingly.
///
/// WARNING: this method deletes the genesis state and replaces it with the provided
Expand All @@ -2257,7 +2304,7 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> HotColdDB<E, Hot, Cold>
genesis_state: &BeaconState<E>,
) -> Result<(), Error> {
// Make sure there is no missing block roots before pruning
self.heal_freezer_block_roots()?;
self.heal_freezer_block_roots_at_split()?;

// Update the anchor to use the dummy state upper limit and disable historic state storage.
let old_anchor = self.get_anchor_info();
Expand Down