Skip to content

Commit

Permalink
fix flaky test (anza-xyz#3295)
Browse files Browse the repository at this point in the history
* fix flaky test
  • Loading branch information
bw-solana authored and ray-kast committed Nov 27, 2024
1 parent 8148679 commit 81e351f
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 8 deletions.
44 changes: 37 additions & 7 deletions local-cluster/tests/local_cluster.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,9 @@ use {
client::AsyncClient,
clock::{self, Slot, DEFAULT_TICKS_PER_SLOT, MAX_PROCESSING_AGE},
commitment_config::CommitmentConfig,
epoch_schedule::{DEFAULT_SLOTS_PER_EPOCH, MINIMUM_SLOTS_PER_EPOCH},
epoch_schedule::{
DEFAULT_SLOTS_PER_EPOCH, MAX_LEADER_SCHEDULE_EPOCH_OFFSET, MINIMUM_SLOTS_PER_EPOCH,
},
genesis_config::ClusterType,
hard_forks::HardForks,
hash::Hash,
Expand All @@ -75,6 +77,7 @@ use {
system_program, system_transaction,
vote::state::TowerSync,
},
solana_stake_program::stake_state::NEW_WARMUP_COOLDOWN_RATE,
solana_streamer::socket::SocketAddrSpace,
solana_turbine::broadcast_stage::{
broadcast_duplicates_run::{BroadcastDuplicatesConfig, ClusterPartition},
Expand Down Expand Up @@ -1536,20 +1539,47 @@ fn test_wait_for_max_stake() {
solana_logger::setup_with_default(RUST_LOG_FILTER);
let validator_config = ValidatorConfig::default_for_test();
let slots_per_epoch = MINIMUM_SLOTS_PER_EPOCH;
// Set this large enough to allow for skipped slots but still be able to
// make a root and derive the new leader schedule in time.
let stakers_slot_offset = slots_per_epoch.saturating_mul(MAX_LEADER_SCHEDULE_EPOCH_OFFSET);
// Reduce this so that we can complete the test faster by advancing through
// slots/epochs faster. But don't make it too small because it can make us
// susceptible to skipped slots and the cluster getting stuck.
let ticks_per_slot = 16;
let num_validators = 4;
let mut config = ClusterConfig {
cluster_lamports: DEFAULT_CLUSTER_LAMPORTS,
node_stakes: vec![DEFAULT_NODE_STAKE; 4],
validator_configs: make_identical_validator_configs(&validator_config, 4),
node_stakes: vec![DEFAULT_NODE_STAKE; num_validators],
validator_configs: make_identical_validator_configs(&validator_config, num_validators),
slots_per_epoch,
stakers_slot_offset: slots_per_epoch,
stakers_slot_offset,
ticks_per_slot,
..ClusterConfig::default()
};
let cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
let client = RpcClient::new_socket(cluster.entry_point_info.rpc().unwrap());

assert!(client
.wait_for_max_stake(CommitmentConfig::default(), 33.0f32)
.is_ok());
let num_validators_activating_stake = num_validators - 1;
// Number of epochs it is expected to take to completely activate the stake
// for all the validators.
let num_expected_epochs = (num_validators_activating_stake as f64)
.log(1. + NEW_WARMUP_COOLDOWN_RATE)
.ceil() as u32
+ 1;
let expected_test_duration = config.poh_config.target_tick_duration
* ticks_per_slot as u32
* slots_per_epoch as u32
* num_expected_epochs;
// Make the timeout double the expected duration to provide some margin.
// Especially considering tests may be running in parallel.
let timeout = expected_test_duration * 2;
if let Err(err) = client.wait_for_max_stake_below_threshold_with_timeout(
CommitmentConfig::default(),
(100 / num_validators_activating_stake) as f32,
timeout,
) {
panic!("wait_for_max_stake failed: {:?}", err);
}
assert!(client.get_slot().unwrap() > 10);
}

Expand Down
39 changes: 38 additions & 1 deletion rpc-client/src/nonblocking/rpc_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2179,8 +2179,37 @@ impl RpcClient {
&self,
commitment: CommitmentConfig,
max_stake_percent: f32,
) -> ClientResult<()> {
self.wait_for_max_stake_below_threshold_with_timeout_helper(
commitment,
max_stake_percent,
None,
)
.await
}

pub async fn wait_for_max_stake_below_threshold_with_timeout(
&self,
commitment: CommitmentConfig,
max_stake_percent: f32,
timeout: Duration,
) -> ClientResult<()> {
self.wait_for_max_stake_below_threshold_with_timeout_helper(
commitment,
max_stake_percent,
Some(timeout),
)
.await
}

async fn wait_for_max_stake_below_threshold_with_timeout_helper(
&self,
commitment: CommitmentConfig,
max_stake_percent: f32,
timeout: Option<Duration>,
) -> ClientResult<()> {
let mut current_percent;
let start = Instant::now();
loop {
let vote_accounts = self.get_vote_accounts_with_commitment(commitment).await?;

Expand All @@ -2197,12 +2226,20 @@ impl RpcClient {
current_percent = 100f32 * max as f32 / total_active_stake as f32;
if current_percent < max_stake_percent {
break;
} else if let Some(timeout) = timeout {
if start.elapsed() > timeout {
return Err(ClientErrorKind::Custom(
"timed out waiting for max stake to drop".to_string(),
)
.into());
}
}

info!(
"Waiting for stake to drop below {} current: {:.1}",
max_stake_percent, current_percent
);
sleep(Duration::from_secs(10)).await;
sleep(Duration::from_secs(5)).await;
}
Ok(())
}
Expand Down
15 changes: 15 additions & 0 deletions rpc-client/src/rpc_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1900,6 +1900,21 @@ impl RpcClient {
self.invoke((self.rpc_client.as_ref()).wait_for_max_stake(commitment, max_stake_percent))
}

pub fn wait_for_max_stake_below_threshold_with_timeout(
&self,
commitment: CommitmentConfig,
max_stake_percent: f32,
timeout: Duration,
) -> ClientResult<()> {
self.invoke(
(self.rpc_client.as_ref()).wait_for_max_stake_below_threshold_with_timeout(
commitment,
max_stake_percent,
timeout,
),
)
}

/// Returns information about all the nodes participating in the cluster.
///
/// # RPC Reference
Expand Down

0 comments on commit 81e351f

Please sign in to comment.