add(state): Track spending transaction ids by spent outpoints and rev…

…ealed nullifiers (#8895) * Adds new column family for [spent_out_loc] -> [spending_tx_loc] with a read method and an update to `prepare_spending_transparent_tx_ids_batch()` for maintaining it when committing blocks to the finalized state. Adds TODOs for remaining production changes needed for issue #8837. * add spending tx ids for spent outpoints to non-finalized chains * adds a `spending_transaction_hash()` read fn for the new column family * Adds a `TransactionIdForSpentOutPoint` ReadRequest and a `TransactionId` ReadResponse * Updates snapshots, removes outdated TODOs, moves a TODO. * Clarifies `spent_utxos` field docs, fixes an assertion * import TypedColumnFamily from `finalized_state` instead of from the crate. * adds db format upgrade for spent outpoints -> tx hash * adds revealing tx ids for nullifiers in finalized and non-finalized states * updates nullifiers column families to include revaling transaction locations in db format upgrade * Renames new read state request to `SpendingTransactionId` and updates its type to a `Spend` enum * refactor db format upgrade and prepare_nullifiers_batch() to use ZebraDb instead of DiskDb, checks cancel_receiver before every db operation * Adds acceptance test for checking that the finalized state has spending transaction ids * Adds variant docs to zebra_state::request::Spend enum * Updates Zebra book with the latest changes to the rocks db column families * Updates acceptance test to check non-finalized state * adds a few log messages to the acceptance test, reduces frequency of logs for progress updates * fixes docs lint and skips test when there is no cached state * Avoids returning genesis coinbase tx hash when indexes are missing * Adds `indexer` compilation feature in zebra-state and build metadata in db format version file * stops tracking new indexes in finalized state when feature is unselected * stops tracking new indexes in non-finalized state when indexer feature is unselected * condenses imports * - adds build metadata when writing db version file, if any. - adds the build metadata to the db version file before adding indexes. - deletes indexes when running without the `indexer` feature * Replaces dropping cf with deleting range of all items to avoid a panic when trying to open the db with that column family. * Fixes lint, avoids reading coinbase transactions from disk * updates db column families table * Document need for having an indexed cached state and use a multi-threaded tokio runtime in has_spending_transaction_ids test * fixes call to renamed `future_blocks` test fn * improves test logs and fixes a disk format deserialization bug * Replaces a new expr with a previously existing constant, fixes typo
ZcashFoundation · Jan 27, 2025 · f0c4971 · f0c4971
1 parent 79fbc03
commit f0c4971
Show file tree

Hide file tree

Showing 43 changed files with 1,008 additions and 186 deletions.
diff --git a/Cargo.lock b/Cargo.lock
@@ -5994,6 +5994,7 @@ dependencies = [
  "bincode",
  "chrono",
  "color-eyre",
+ "crossbeam-channel",
  "dirs",
  "elasticsearch",
  "futures",

diff --git a/book/src/dev/state-db-upgrades.md b/book/src/dev/state-db-upgrades.md
@@ -326,6 +326,19 @@ We use the following rocksdb column families:
 | `history_tree`                     | `()`                   | `NonEmptyHistoryTree`         | Update  |
 | `tip_chain_value_pool`             | `()`                   | `ValueBalance`                | Update  |
 
+With the following additional modifications when compiled with the `indexer` feature:
+
+| Column Family                      | Keys                   | Values                        | Changes |
+| ---------------------------------- | ---------------------- | ----------------------------- | ------- |
+| *Transparent*                      |                        |                               |         |
+| `tx_loc_by_spent_out_loc`          | `OutputLocation`       | `TransactionLocation`         | Create  |
+| *Sprout*                           |                        |                               |         |
+| `sprout_nullifiers`                | `sprout::Nullifier`    | `TransactionLocation`         | Create  |
+| *Sapling*                          |                        |                               |         |
+| `sapling_nullifiers`               | `sapling::Nullifier`   | `TransactionLocation`         | Create  |
+| *Orchard*                          |                        |                               |         |
+| `orchard_nullifiers`               | `orchard::Nullifier`   | `TransactionLocation`         | Create  |
+
 ### Data Formats
 [rocksdb-data-format]: #rocksdb-data-format
 

diff --git a/zebra-rpc/Cargo.toml b/zebra-rpc/Cargo.toml
@@ -27,6 +27,7 @@ indexer-rpcs = [
     "tonic-reflection",
     "prost",
     "tokio-stream",
+    "zebra-state/indexer"
 ]
 
 # Production features that activate extra dependencies, or extra features in dependencies

diff --git a/zebra-state/Cargo.toml b/zebra-state/Cargo.toml
@@ -27,6 +27,9 @@ getblocktemplate-rpcs = [
     "zebra-chain/getblocktemplate-rpcs",
 ]
 
+# Indexes spending transaction ids by spent outpoints and revealed nullifiers
+indexer = []
+
 # Test-only features
 proptest-impl = [
     "proptest",
@@ -63,6 +66,7 @@ regex = "1.11.0"
 rlimit = "0.10.2"
 rocksdb = { version = "0.22.0", default-features = false, features = ["lz4"] }
 semver = "1.0.23"
+crossbeam-channel = "0.5.13"
 serde = { version = "1.0.215", features = ["serde_derive"] }
 tempfile = "3.14.0"
 thiserror = "2.0.6"

diff --git a/zebra-state/src/config.rs b/zebra-state/src/config.rs
@@ -431,15 +431,7 @@ pub(crate) fn database_format_version_at_path(
 
     // The database has a version file on disk
     if let Some(version) = disk_version_file {
-        let (minor, patch) = version
-            .split_once('.')
-            .ok_or("invalid database format version file")?;
-
-        return Ok(Some(Version::new(
-            major_version,
-            minor.parse()?,
-            patch.parse()?,
-        )));
+        return Ok(Some(format!("{major_version}.{version}").parse()?));
     }
 
     // There's no version file on disk, so we need to guess the version
@@ -508,7 +500,11 @@ pub(crate) mod hidden {
     ) -> Result<(), BoxError> {
         let version_path = config.version_file_path(db_kind, changed_version.major, network);
 
-        let version = format!("{}.{}", changed_version.minor, changed_version.patch);
+        let mut version = format!("{}.{}", changed_version.minor, changed_version.patch);
+
+        if !changed_version.build.is_empty() {
+            version.push_str(&format!("+{}", changed_version.build));
+        }
 
         // Write the version file atomically so the cache is not corrupted if Zebra shuts down or
         // crashes.

diff --git a/zebra-state/src/constants.rs b/zebra-state/src/constants.rs
@@ -66,11 +66,16 @@ const DATABASE_FORMAT_PATCH_VERSION: u64 = 0;
 /// This is the version implemented by the Zebra code that's currently running,
 /// the minor and patch versions on disk can be different.
 pub fn state_database_format_version_in_code() -> Version {
-    Version::new(
-        DATABASE_FORMAT_VERSION,
-        DATABASE_FORMAT_MINOR_VERSION,
-        DATABASE_FORMAT_PATCH_VERSION,
-    )
+    Version {
+        major: DATABASE_FORMAT_VERSION,
+        minor: DATABASE_FORMAT_MINOR_VERSION,
+        patch: DATABASE_FORMAT_PATCH_VERSION,
+        pre: semver::Prerelease::EMPTY,
+        #[cfg(feature = "indexer")]
+        build: semver::BuildMetadata::new("indexer").expect("hard-coded value should be valid"),
+        #[cfg(not(feature = "indexer"))]
+        build: semver::BuildMetadata::EMPTY,
+    }
 }
 
 /// Returns the highest database version that modifies the subtree index format.

diff --git a/zebra-state/src/lib.rs b/zebra-state/src/lib.rs
@@ -44,6 +44,10 @@ pub use error::{
 pub use request::{
     CheckpointVerifiedBlock, HashOrHeight, ReadRequest, Request, SemanticallyVerifiedBlock,
 };
+
+#[cfg(feature = "indexer")]
+pub use request::Spend;
+
 pub use response::{KnownBlock, MinedTx, ReadResponse, Response};
 pub use service::{
     chain_tip::{ChainTipBlock, ChainTipChange, ChainTipSender, LatestChainTip, TipAction},

diff --git a/zebra-state/src/request.rs b/zebra-state/src/request.rs
@@ -29,6 +29,51 @@ use crate::{
     ReadResponse, Response,
 };
 
+/// Identify a spend by a transparent outpoint or revealed nullifier.
+///
+/// This enum implements `From` for [`transparent::OutPoint`], [`sprout::Nullifier`],
+/// [`sapling::Nullifier`], and [`orchard::Nullifier`].
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+#[cfg(feature = "indexer")]
+pub enum Spend {
+    /// A spend identified by a [`transparent::OutPoint`].
+    OutPoint(transparent::OutPoint),
+    /// A spend identified by a [`sprout::Nullifier`].
+    Sprout(sprout::Nullifier),
+    /// A spend identified by a [`sapling::Nullifier`].
+    Sapling(sapling::Nullifier),
+    /// A spend identified by a [`orchard::Nullifier`].
+    Orchard(orchard::Nullifier),
+}
+
+#[cfg(feature = "indexer")]
+impl From<transparent::OutPoint> for Spend {
+    fn from(outpoint: transparent::OutPoint) -> Self {
+        Self::OutPoint(outpoint)
+    }
+}
+
+#[cfg(feature = "indexer")]
+impl From<sprout::Nullifier> for Spend {
+    fn from(sprout_nullifier: sprout::Nullifier) -> Self {
+        Self::Sprout(sprout_nullifier)
+    }
+}
+
+#[cfg(feature = "indexer")]
+impl From<sapling::Nullifier> for Spend {
+    fn from(sapling_nullifier: sapling::Nullifier) -> Self {
+        Self::Sapling(sapling_nullifier)
+    }
+}
+
+#[cfg(feature = "indexer")]
+impl From<orchard::Nullifier> for Spend {
+    fn from(orchard_nullifier: orchard::Nullifier) -> Self {
+        Self::Orchard(orchard_nullifier)
+    }
+}
+
 /// Identify a block by hash or height.
 ///
 /// This enum implements `From` for [`block::Hash`] and [`block::Height`],
@@ -1020,6 +1065,13 @@ pub enum ReadRequest {
         height_range: RangeInclusive<block::Height>,
     },
 
+    /// Looks up a spending transaction id by its spent transparent input.
+    ///
+    /// Returns [`ReadResponse::TransactionId`] with the hash of the transaction
+    /// that spent the output at the provided [`transparent::OutPoint`].
+    #[cfg(feature = "indexer")]
+    SpendingTransactionId(Spend),
+
     /// Looks up utxos for the provided addresses.
     ///
     /// Returns a type with found utxos and transaction information.
@@ -1106,6 +1158,8 @@ impl ReadRequest {
             }
             ReadRequest::BestChainNextMedianTimePast => "best_chain_next_median_time_past",
             ReadRequest::BestChainBlockHash(_) => "best_chain_block_hash",
+            #[cfg(feature = "indexer")]
+            ReadRequest::SpendingTransactionId(_) => "spending_transaction_id",
             #[cfg(feature = "getblocktemplate-rpcs")]
             ReadRequest::ChainInfo => "chain_info",
             #[cfg(feature = "getblocktemplate-rpcs")]

diff --git a/zebra-state/src/response.rs b/zebra-state/src/response.rs
@@ -175,6 +175,12 @@ pub enum ReadResponse {
     /// or `None` if the block was not found.
     TransactionIdsForBlock(Option<Arc<[transaction::Hash]>>),
 
+    /// Response to [`ReadRequest::SpendingTransactionId`],
+    /// with an list of transaction hashes in block order,
+    /// or `None` if the block was not found.
+    #[cfg(feature = "indexer")]
+    TransactionId(Option<transaction::Hash>),
+
     /// Response to [`ReadRequest::BlockLocator`] with a block locator object.
     BlockLocator(Vec<block::Hash>),
 
@@ -343,6 +349,9 @@ impl TryFrom<ReadResponse> for Response {
                 Err("there is no corresponding Response for this ReadResponse")
             }
 
+            #[cfg(feature = "indexer")]
+            ReadResponse::TransactionId(_) => Err("there is no corresponding Response for this ReadResponse"),
+
             #[cfg(feature = "getblocktemplate-rpcs")]
             ReadResponse::ValidBlockProposal => Ok(Response::ValidBlockProposal),
 

diff --git a/zebra-state/src/service.rs b/zebra-state/src/service.rs
@@ -1383,6 +1383,35 @@ impl Service<ReadRequest> for ReadStateService {
                 .wait_for_panics()
             }
 
+            #[cfg(feature = "indexer")]
+            ReadRequest::SpendingTransactionId(spend) => {
+                let state = self.clone();
+
+                tokio::task::spawn_blocking(move || {
+                    span.in_scope(move || {
+                        let spending_transaction_id = state
+                            .non_finalized_state_receiver
+                            .with_watch_data(|non_finalized_state| {
+                                read::spending_transaction_hash(
+                                    non_finalized_state.best_chain(),
+                                    &state.db,
+                                    spend,
+                                )
+                            });
+
+                        // The work is done in the future.
+                        timer.finish(
+                            module_path!(),
+                            line!(),
+                            "ReadRequest::TransactionIdForSpentOutPoint",
+                        );
+
+                        Ok(ReadResponse::TransactionId(spending_transaction_id))
+                    })
+                })
+                .wait_for_panics()
+            }
+
             ReadRequest::UnspentBestChainUtxo(outpoint) => {
                 let state = self.clone();
 

diff --git a/zebra-state/src/service/check/nullifier.rs b/zebra-state/src/service/check/nullifier.rs
@@ -1,13 +1,16 @@
 //! Checks for nullifier uniqueness.
 
-use std::{collections::HashSet, sync::Arc};
+use std::{collections::HashMap, sync::Arc};
 
 use tracing::trace;
 use zebra_chain::transaction::Transaction;
 
 use crate::{
     error::DuplicateNullifierError,
-    service::{finalized_state::ZebraDb, non_finalized_state::Chain},
+    service::{
+        finalized_state::ZebraDb,
+        non_finalized_state::{Chain, SpendingTransactionId},
+    },
     SemanticallyVerifiedBlock, ValidateContextError,
 };
 
@@ -105,19 +108,22 @@ pub(crate) fn tx_no_duplicates_in_chain(
     find_duplicate_nullifier(
         transaction.sprout_nullifiers(),
         |nullifier| finalized_chain.contains_sprout_nullifier(nullifier),
-        non_finalized_chain.map(|chain| |nullifier| chain.sprout_nullifiers.contains(nullifier)),
+        non_finalized_chain
+            .map(|chain| |nullifier| chain.sprout_nullifiers.contains_key(nullifier)),
     )?;
 
     find_duplicate_nullifier(
         transaction.sapling_nullifiers(),
         |nullifier| finalized_chain.contains_sapling_nullifier(nullifier),
-        non_finalized_chain.map(|chain| |nullifier| chain.sapling_nullifiers.contains(nullifier)),
+        non_finalized_chain
+            .map(|chain| |nullifier| chain.sapling_nullifiers.contains_key(nullifier)),
     )?;
 
     find_duplicate_nullifier(
         transaction.orchard_nullifiers(),
         |nullifier| finalized_chain.contains_orchard_nullifier(nullifier),
-        non_finalized_chain.map(|chain| |nullifier| chain.orchard_nullifiers.contains(nullifier)),
+        non_finalized_chain
+            .map(|chain| |nullifier| chain.orchard_nullifiers.contains_key(nullifier)),
     )?;
 
     Ok(())
@@ -156,8 +162,9 @@ pub(crate) fn tx_no_duplicates_in_chain(
 /// [5]: service::non_finalized_state::Chain
 #[tracing::instrument(skip(chain_nullifiers, shielded_data_nullifiers))]
 pub(crate) fn add_to_non_finalized_chain_unique<'block, NullifierT>(
-    chain_nullifiers: &mut HashSet<NullifierT>,
+    chain_nullifiers: &mut HashMap<NullifierT, SpendingTransactionId>,
     shielded_data_nullifiers: impl IntoIterator<Item = &'block NullifierT>,
+    revealing_tx_id: SpendingTransactionId,
 ) -> Result<(), ValidateContextError>
 where
     NullifierT: DuplicateNullifierError + Copy + std::fmt::Debug + Eq + std::hash::Hash + 'block,
@@ -166,7 +173,10 @@ where
         trace!(?nullifier, "adding nullifier");
 
         // reject the nullifier if it is already present in this non-finalized chain
-        if !chain_nullifiers.insert(*nullifier) {
+        if chain_nullifiers
+            .insert(*nullifier, revealing_tx_id)
+            .is_some()
+        {
             Err(nullifier.duplicate_nullifier_error(false))?;
         }
     }
@@ -200,7 +210,7 @@ where
 /// [1]: service::non_finalized_state::Chain
 #[tracing::instrument(skip(chain_nullifiers, shielded_data_nullifiers))]
 pub(crate) fn remove_from_non_finalized_chain<'block, NullifierT>(
-    chain_nullifiers: &mut HashSet<NullifierT>,
+    chain_nullifiers: &mut HashMap<NullifierT, SpendingTransactionId>,
     shielded_data_nullifiers: impl IntoIterator<Item = &'block NullifierT>,
 ) where
     NullifierT: std::fmt::Debug + Eq + std::hash::Hash + 'block,
@@ -209,7 +219,7 @@ pub(crate) fn remove_from_non_finalized_chain<'block, NullifierT>(
         trace!(?nullifier, "removing nullifier");
 
         assert!(
-            chain_nullifiers.remove(nullifier),
+            chain_nullifiers.remove(nullifier).is_some(),
             "nullifier must be present if block was added to chain"
         );
     }

diff --git a/zebra-state/src/service/check/tests/utxo.rs b/zebra-state/src/service/check/tests/utxo.rs
@@ -221,7 +221,7 @@ proptest! {
                 .unwrap();
             prop_assert!(!chain.unspent_utxos().contains_key(&expected_outpoint));
             prop_assert!(chain.created_utxos.contains_key(&expected_outpoint));
-            prop_assert!(chain.spent_utxos.contains(&expected_outpoint));
+            prop_assert!(chain.spent_utxos.contains_key(&expected_outpoint));
 
             // the finalized state does not have the UTXO
             prop_assert!(finalized_state.utxo(&expected_outpoint).is_none());
@@ -310,14 +310,14 @@ proptest! {
             if use_finalized_state_output {
                 // the chain has spent the UTXO from the finalized state
                 prop_assert!(!chain.created_utxos.contains_key(&expected_outpoint));
-                prop_assert!(chain.spent_utxos.contains(&expected_outpoint));
+                prop_assert!(chain.spent_utxos.contains_key(&expected_outpoint));
                 // the finalized state has the UTXO, but it will get deleted on commit
                 prop_assert!(finalized_state.utxo(&expected_outpoint).is_some());
             } else {
                 // the chain has spent its own UTXO
                 prop_assert!(!chain.unspent_utxos().contains_key(&expected_outpoint));
                 prop_assert!(chain.created_utxos.contains_key(&expected_outpoint));
-                prop_assert!(chain.spent_utxos.contains(&expected_outpoint));
+                prop_assert!(chain.spent_utxos.contains_key(&expected_outpoint));
                 // the finalized state does not have the UTXO
                 prop_assert!(finalized_state.utxo(&expected_outpoint).is_none());
             }
@@ -650,12 +650,12 @@ proptest! {
                 // the finalized state has the unspent UTXO
                 prop_assert!(finalized_state.utxo(&expected_outpoint).is_some());
                 // the non-finalized state has spent the UTXO
-                prop_assert!(chain.spent_utxos.contains(&expected_outpoint));
+                prop_assert!(chain.spent_utxos.contains_key(&expected_outpoint));
             } else {
                 // the non-finalized state has created and spent the UTXO
                 prop_assert!(!chain.unspent_utxos().contains_key(&expected_outpoint));
                 prop_assert!(chain.created_utxos.contains_key(&expected_outpoint));
-                prop_assert!(chain.spent_utxos.contains(&expected_outpoint));
+                prop_assert!(chain.spent_utxos.contains_key(&expected_outpoint));
                 // the finalized state does not have the UTXO
                 prop_assert!(finalized_state.utxo(&expected_outpoint).is_none());
             }