From d14ca6d9e28ee4d8d0342a1c2ed06162e2c51767 Mon Sep 17 00:00:00 2001 From: Alin Dima Date: Wed, 17 Jul 2024 12:27:11 +0300 Subject: [PATCH] add elastic scaling MVP guide (#4663) Resolves https://github.com/paritytech/polkadot-sdk/issues/4468 Gives instructions on how to enable elastic scaling MVP to parachain teams. Still a draft because it depends on further changes we make to the slot-based collator: https://github.com/paritytech/polkadot-sdk/pull/4097 Parachains cannot use this yet because the collator was not released and no relay chain network has been configured for elastic scaling yet --- Cargo.lock | 1 + cumulus/polkadot-parachain/Cargo.toml | 1 + cumulus/polkadot-parachain/src/service.rs | 71 ++++++--- .../src/guides/enable_elastic_scaling_mvp.rs | 142 ++++++++++++++++++ docs/sdk/src/guides/mod.rs | 3 + prdoc/pr_4663.prdoc | 14 ++ 6 files changed, 215 insertions(+), 17 deletions(-) create mode 100644 docs/sdk/src/guides/enable_elastic_scaling_mvp.rs create mode 100644 prdoc/pr_4663.prdoc diff --git a/Cargo.lock b/Cargo.lock index 2e5a50ef2e1d..cc728a0828cc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13766,6 +13766,7 @@ dependencies = [ "cumulus-primitives-aura", "cumulus-primitives-core", "cumulus-relay-chain-interface", + "docify", "frame-benchmarking", "frame-benchmarking-cli", "frame-support", diff --git a/cumulus/polkadot-parachain/Cargo.toml b/cumulus/polkadot-parachain/Cargo.toml index b1eebb04069d..3ea2a3610794 100644 --- a/cumulus/polkadot-parachain/Cargo.toml +++ b/cumulus/polkadot-parachain/Cargo.toml @@ -24,6 +24,7 @@ hex-literal = { workspace = true, default-features = true } log = { workspace = true, default-features = true } serde = { features = ["derive"], workspace = true, default-features = true } serde_json = { workspace = true, default-features = true } +docify = { workspace = true } # Local rococo-parachain-runtime = { workspace = true } diff --git a/cumulus/polkadot-parachain/src/service.rs b/cumulus/polkadot-parachain/src/service.rs index f5f6189d1f0d..6a6cf15635e0 100644 --- a/cumulus/polkadot-parachain/src/service.rs +++ b/cumulus/polkadot-parachain/src/service.rs @@ -15,13 +15,16 @@ // along with Cumulus. If not, see . use cumulus_client_cli::{CollatorOptions, ExportGenesisHeadCommand}; -use cumulus_client_collator::service::CollatorService; -use cumulus_client_consensus_aura::collators::{ - lookahead::{self as aura, Params as AuraParams}, - slot_based::{self as slot_based, Params as SlotBasedParams}, +use cumulus_client_collator::service::{ + CollatorService, ServiceInterface as CollatorServiceInterface, +}; +use cumulus_client_consensus_aura::collators::lookahead::{self as aura, Params as AuraParams}; +#[docify::export(slot_based_colator_import)] +use cumulus_client_consensus_aura::collators::slot_based::{ + self as slot_based, Params as SlotBasedParams, }; use cumulus_client_consensus_common::ParachainBlockImport as TParachainBlockImport; -use cumulus_client_consensus_proposer::Proposer; +use cumulus_client_consensus_proposer::{Proposer, ProposerInterface}; use cumulus_client_consensus_relay_chain::Verifier as RelayChainVerifier; #[allow(deprecated)] use cumulus_client_service::old_consensus; @@ -62,6 +65,7 @@ use sc_sysinfo::HwBench; use sc_telemetry::{Telemetry, TelemetryHandle, TelemetryWorker, TelemetryWorkerHandle}; use sc_transaction_pool::FullPool; use sp_api::ProvideRuntimeApi; +use sp_inherents::CreateInherentDataProviders; use sp_keystore::KeystorePtr; use sp_runtime::{app_crypto::AppCrypto, traits::Header as HeaderT}; use std::{marker::PhantomData, pin::Pin, sync::Arc, time::Duration}; @@ -623,6 +627,48 @@ pub(crate) struct StartSlotBasedAuraConsensus( PhantomData<(RuntimeApi, AuraId)>, ); +impl StartSlotBasedAuraConsensus +where + RuntimeApi: ConstructNodeRuntimeApi>, + RuntimeApi::RuntimeApi: AuraRuntimeApi, + AuraId: AuraIdT + Sync, +{ + #[docify::export_content] + fn launch_slot_based_collator( + params: SlotBasedParams< + ParachainBlockImport, + CIDP, + ParachainClient, + ParachainBackend, + Arc, + CHP, + Proposer, + CS, + >, + task_manager: &TaskManager, + ) where + CIDP: CreateInherentDataProviders + 'static, + CIDP::InherentDataProviders: Send, + CHP: cumulus_client_consensus_common::ValidationCodeHashProvider + Send + 'static, + Proposer: ProposerInterface + Send + Sync + 'static, + CS: CollatorServiceInterface + Send + Sync + Clone + 'static, + { + let (collation_future, block_builder_future) = + slot_based::run::::Pair, _, _, _, _, _, _, _, _>(params); + + task_manager.spawn_essential_handle().spawn( + "collation-task", + Some("parachain-block-authoring"), + collation_future, + ); + task_manager.spawn_essential_handle().spawn( + "block-builder-task", + Some("parachain-block-authoring"), + block_builder_future, + ); + } +} + impl StartConsensus for StartSlotBasedAuraConsensus where @@ -683,19 +729,10 @@ where slot_drift: Duration::from_secs(1), }; - let (collation_future, block_builder_future) = - slot_based::run::::Pair, _, _, _, _, _, _, _, _>(params); + // We have a separate function only to be able to use `docify::export` on this piece of + // code. + Self::launch_slot_based_collator(params, task_manager); - task_manager.spawn_essential_handle().spawn( - "collation-task", - Some("parachain-block-authoring"), - collation_future, - ); - task_manager.spawn_essential_handle().spawn( - "block-builder-task", - Some("parachain-block-authoring"), - block_builder_future, - ); Ok(()) } } diff --git a/docs/sdk/src/guides/enable_elastic_scaling_mvp.rs b/docs/sdk/src/guides/enable_elastic_scaling_mvp.rs new file mode 100644 index 000000000000..bc4f36c271fe --- /dev/null +++ b/docs/sdk/src/guides/enable_elastic_scaling_mvp.rs @@ -0,0 +1,142 @@ +//! # Enable elastic scaling MVP for a parachain +//! +//!
This guide assumes full familiarity with Asynchronous Backing and its +//! terminology, as defined in https://wiki.polkadot.network/docs/maintain-guides-async-backing. +//! Furthermore, the parachain should have already been upgraded according to the guide.
+//! +//! ## Quick introduction to elastic scaling +//! +//! [Elastic scaling](https://polkadot.network/blog/elastic-scaling-streamling-growth-on-polkadot) +//! is a feature that will enable parachains to seamlessly scale up/down the number of used cores. +//! This can be desirable in order to increase the compute or storage throughput of a parachain or +//! to lower the latency between a transaction being submitted and it getting built in a parachain +//! block. +//! +//! At present, with Asynchronous Backing enabled, a parachain can only include a block on the relay +//! chain every 6 seconds, irregardless of how many cores the parachain acquires. Elastic scaling +//! builds further on the 10x throughput increase of Async Backing, enabling collators to submit up +//! to 3 parachain blocks per relay chain block, resulting in a further 3x throughput increase. +//! +//! ## Current limitations of the MVP +//! +//! The full implementation of elastic scaling spans across the entire relay/parachain stack and is +//! still [work in progress](https://github.com/paritytech/polkadot-sdk/issues/1829). +//! The MVP is still considered experimental software, so stability is not guaranteed. +//! If you encounter any problems, +//! [please open an issue](https://github.com/paritytech/polkadot-sdk/issues). +//! Below are described the current limitations of the MVP: +//! +//! 1. **Limited core count**. Parachain block authoring is sequential, so the second block will +//! start being built only after the previous block is imported. The current block production is +//! capped at 2 seconds of execution. Therefore, assuming the full 2 seconds are used, a +//! parachain can only utilise at most 3 cores in a relay chain slot of 6 seconds. If the full +//! execution time is not being used, higher core counts can be achieved. +//! 2. **Single collator requirement for consistently scaling beyond a core at full authorship +//! duration of 2 seconds per block.** Using the current implementation with multiple collators +//! adds additional latency to the block production pipeline. Assuming block execution takes +//! about the same as authorship, the additional overhead is equal the duration of the authorship +//! plus the block announcement. Each collator must first import the previous block before +//! authoring a new one, so it is clear that the highest throughput can be achieved using a +//! single collator. Experiments show that the peak performance using more than one collator +//! (measured up to 10 collators) is utilising 2 cores with authorship time of 1.3 seconds per +//! block, which leaves 400ms for networking overhead. This would allow for 2.6 seconds of +//! execution, compared to the 2 seconds async backing enabled. +//! [More experiments](https://github.com/paritytech/polkadot-sdk/issues/4696) are being +//! conducted in this space. +//! 3. **Trusted collator set.** The collator set needs to be trusted until there’s a mitigation +//! that would prevent or deter multiple collators from submitting the same collation to multiple +//! backing groups. A solution is being discussed +//! [here](https://github.com/polkadot-fellows/RFCs/issues/92). +//! 4. **Fixed scaling.** For true elasticity, the parachain must be able to seamlessly acquire or +//! sell coretime as the user demand grows and shrinks over time, in an automated manner. This is +//! currently lacking - a parachain can only scale up or down by “manually” acquiring coretime. +//! This is not in the scope of the relay chain functionality. Parachains can already start +//! implementing such autoscaling, but we aim to provide a framework/examples for developing +//! autoscaling strategies. +//! +//! Another hard limitation that is not envisioned to ever be lifted is that parachains which create +//! forks will generally not be able to utilise the full number of cores they acquire. +//! +//! ## Using elastic scaling MVP +//! +//! ### Prerequisites +//! +//! - Ensure Asynchronous Backing is enabled on the network and you have enabled it on the parachain +//! using [`crate::guides::async_backing_guide`]. +//! - Ensure the `AsyncBackingParams.max_candidate_depth` value is configured to a value that is at +//! least double the maximum targeted parachain velocity. For example, if the parachain will build +//! at most 3 candidates per relay chain block, the `max_candidate_depth` should be at least 6. +//! - Use a trusted single collator for maximum throughput. +//! - Ensure enough coretime is assigned to the parachain. For maximum throughput the upper bound is +//! 3 cores. +//! +//!
Phase 1 is not needed if using the `polkadot-parachain` binary built +//! from the latest polkadot-sdk release! Simply pass the `--experimental-use-slot-based` parameter +//! to the command line and jump to Phase 2.
+//! +//! The following steps assume using the cumulus parachain template. +//! +//! ### Phase 1 - (For custom parachain node) Update Parachain Node +//! +//! This assumes you are using +//! [the latest parachain template](https://github.com/paritytech/polkadot-sdk/tree/master/templates/parachain). +//! +//! This phase consists of plugging in the new slot-based collator. +//! +//! 1. In `node/src/service.rs` import the slot based collator instead of the lookahead collator. +#![doc = docify::embed!("../../cumulus/polkadot-parachain/src/service.rs", slot_based_colator_import)] +//! +//! 2. In `start_consensus()` +//! - Remove the `overseer_handle` param (also remove the +//! `OverseerHandle` type import if it’s not used elsewhere). +//! - Rename `AuraParams` to `SlotBasedParams`, remove the `overseer_handle` field and add a +//! `slot_drift` field with a value of `Duration::from_secs(1)`. +//! - Replace the single future returned by `aura::run` with the two futures returned by it and +//! spawn them as separate tasks: +#![doc = docify::embed!("../../cumulus/polkadot-parachain/src/service.rs", launch_slot_based_collator)] +//! +//! 3. In `start_parachain_node()` remove the `overseer_handle` param passed to `start_consensus`. +//! +//! ### Phase 2 - Activate fixed factor scaling in the runtime +//! +//! This phase consists of a couple of changes needed to be made to the parachain’s runtime in order +//! to utilise fixed factor scaling. +//! +//! First of all, you need to decide the upper limit to how many parachain blocks you need to +//! produce per relay chain block (in direct correlation with the number of acquired cores). This +//! should be either 1 (no scaling), 2 or 3. This is called the parachain velocity. +//! +//! If you configure a velocity which is different from the number of assigned cores, the measured +//! velocity in practice will be the minimum of these two. +//! +//! The chosen velocity will also be used to compute: +//! - The slot duration, by dividing the 6000 ms duration of the relay chain slot duration by the +//! velocity. +//! - The unincluded segment capacity, by multiplying the velocity with 2 and adding 1 to +//! it. +//! +//! Let’s assume a desired maximum velocity of 3 parachain blocks per relay chain block. The needed +//! changes would all be done in `runtime/src/lib.rs`: +//! +//! 1. Rename `BLOCK_PROCESSING_VELOCITY` to `MAX_BLOCK_PROCESSING_VELOCITY` and increase it to the +//! desired value. In this example, 3. +//! +//! ```ignore +//! const MAX_BLOCK_PROCESSING_VELOCITY: u32 = 3; +//! ``` +//! +//! 2. Set the `MILLISECS_PER_BLOCK` to the desired value. +//! +//! ```ignore +//! const MILLISECS_PER_BLOCK: u32 = +//! RELAY_CHAIN_SLOT_DURATION_MILLIS / MAX_BLOCK_PROCESSING_VELOCITY; +//! ``` +//! Note: for a parachain which measures time in terms of its own block number, changing block +//! time may cause complications, requiring additional changes. See here more information: +//! [`crate::guides::async_backing_guide#timing-by-block-number`]. +//! +//! 3. Increase the `UNINCLUDED_SEGMENT_CAPACITY` to the desired value. +//! +//! ```ignore +//! const UNINCLUDED_SEGMENT_CAPACITY: u32 = 2 * MAX_BLOCK_PROCESSING_VELOCITY + 1; +//! ``` diff --git a/docs/sdk/src/guides/mod.rs b/docs/sdk/src/guides/mod.rs index 8296ed447e14..9384f4c82ab3 100644 --- a/docs/sdk/src/guides/mod.rs +++ b/docs/sdk/src/guides/mod.rs @@ -41,3 +41,6 @@ pub mod async_backing_guide; /// How to enable metadata hash verification in the runtime. pub mod enable_metadata_hash; + +/// How to enable elastic scaling MVP on a parachain. +pub mod enable_elastic_scaling_mvp; diff --git a/prdoc/pr_4663.prdoc b/prdoc/pr_4663.prdoc new file mode 100644 index 000000000000..74b1274828d5 --- /dev/null +++ b/prdoc/pr_4663.prdoc @@ -0,0 +1,14 @@ +# Schema: Polkadot SDK PRDoc Schema (prdoc) v1.0.0 +# See doc at https://raw.githubusercontent.com/paritytech/polkadot-sdk/master/prdoc/schema_user.json + +title: Add elastic scaling MVP guide + +doc: + - audience: Node Operator + description: | + Adds a guide for parachains that want to use the experimental elastic scaling MVP. + Will be viewable at: https://paritytech.github.io/polkadot-sdk/master/polkadot_sdk_docs/guides/enable_elastic_scaling_mvp/index.html + +crates: + - name: polkadot-parachain-bin + bump: none