diff --git a/Cargo.lock b/Cargo.lock index 3f42db22b6..d037c8a543 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -33,9 +33,9 @@ dependencies = [ [[package]] name = "autocfg" -version = "1.0.1" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] name = "bech32" @@ -185,9 +185,9 @@ dependencies = [ [[package]] name = "crossbeam-epoch" -version = "0.9.6" +version = "0.9.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97242a70df9b89a65d0b6df3c4bf5b9ce03c5b7309019777fbde37e7537f8762" +checksum = "c00d6d2ea26e8b151d99093005cb442fb9a37aeaca582a03ec70946f49ab5ed9" dependencies = [ "cfg-if", "crossbeam-utils", @@ -198,9 +198,9 @@ dependencies = [ [[package]] name = "crossbeam-utils" -version = "0.8.6" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfcae03edb34f947e64acdb1c33ec169824e20657e9ecb61cef6c8c74dcb8120" +checksum = "b5e5bed1f1c269533fa816a0a5492b3545209a205ca1a54842be180eb63a16a6" dependencies = [ "cfg-if", "lazy_static", @@ -401,9 +401,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.116" +version = "0.2.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "565dbd88872dbe4cc8a46e527f26483c1d1f7afa6b884a3bd6cd893d4f98da74" +checksum = "e74d72e0f9b65b5b4ca49a346af3976df0f9c61d550727f349ecd559f251a26c" [[package]] name = "log" @@ -422,9 +422,9 @@ checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" [[package]] name = "memmap2" -version = "0.5.2" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe3179b85e1fd8b14447cbebadb75e45a1002f541b925f0bfec366d56a81c56d" +checksum = "057a3db23999c867821a7a59feb06a578fcb03685e983dff90daf9e7d24ac08f" dependencies = [ "libc", ] @@ -476,6 +476,7 @@ dependencies = [ "integer-cbrt", "integer-sqrt", "log", + "memmap2", "redb", "regex", "structopt", @@ -689,9 +690,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.4" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "568a8e6258aa33c13358f81fd834adb854c6f7c9468520910a9b1e8fac068012" +checksum = "0486718e92ec9a68fbed73bb5ef687d71103b142595b406835649bebd33f72c7" [[package]] name = "serde" @@ -722,9 +723,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.78" +version = "1.0.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d23c1ba4cf0efd44be32017709280b32d1cea5c3f1275c3b6d9e8bc54f758085" +checksum = "8e8d9fa5c3b304765ce1fd9c4c8a3de2c8db365a5b91be52f186efc675681d95" dependencies = [ "itoa 1.0.1", "ryu", @@ -816,9 +817,9 @@ dependencies = [ [[package]] name = "unicode-segmentation" -version = "1.8.0" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8895849a949e7845e06bd6dc1aa51731a103c42707010a5b591c0038fb73385b" +checksum = "7e8820f5d777f6224dc4be3632222971ac30164d4a258d595640799554ebfd99" [[package]] name = "unicode-width" diff --git a/Cargo.toml b/Cargo.toml index 65644c6aa8..6268abd62e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,7 @@ executable-path = "1.0.0" integer-cbrt = "0.1.2" integer-sqrt = "0.1.5" log = "0.4.14" +memmap2 = "0.5.3" redb = "0.0.4" structopt = "0.3.25" tempfile = "3.2.0" diff --git a/src/index.rs b/src/index.rs index 7e805d4e29..cf11d4cb62 100644 --- a/src/index.rs +++ b/src/index.rs @@ -6,9 +6,9 @@ pub(crate) struct Index { } impl Index { - const HASH_TO_BLOCK: &'static str = "HASH_TO_BLOCK"; const HASH_TO_CHILDREN: &'static str = "HASH_TO_CHILDREN"; const HASH_TO_HEIGHT: &'static str = "HASH_TO_HEIGHT"; + const HASH_TO_LOCATION: &'static str = "HASH_TO_LOCATION"; const HEIGHT_TO_HASH: &'static str = "HEIGHT_TO_HASH"; const OUTPOINT_TO_ORDINAL_RANGES: &'static str = "OUTPOINT_TO_ORDINAL_RANGES"; @@ -44,6 +44,8 @@ impl Index { } fn index_ranges(&self) -> Result { + log::info!("Indexing ranges…"); + let mut height = 0; while let Some(block) = self.block(height)? { let wtx = self.database.begin_write()?; @@ -160,42 +162,33 @@ impl Index { Ok(()) } + fn blockfile_path(&self, i: u64) -> PathBuf { + self.blocksdir.join(format!("blk{:05}.dat", i)) + } + fn index_blockfiles(&self) -> Result { - let mut blockfiles = 0; - loop { - match File::open(self.blocksdir.join(format!("blk{:05}.dat", blockfiles))) { - Ok(_) => {} - Err(err) => { - if err.kind() == io::ErrorKind::NotFound { - break; - } else { - return Err(err.into()); - } - } - } - blockfiles += 1; - } + let blockfiles = (0..) + .map(|i| self.blockfile_path(i)) + .take_while(|path| path.is_file()) + .count(); log::info!("Indexing {} blockfiles…", blockfiles); for i in 0.. { - let blocks = match fs::read(self.blocksdir.join(format!("blk{:05}.dat", i))) { - Ok(blocks) => blocks, - Err(err) => { - if err.kind() == io::ErrorKind::NotFound { - break; - } else { - return Err(err.into()); - } - } - }; + let path = self.blockfile_path(i); + + if !path.is_file() { + break; + } + + let blocks = unsafe { Mmap::map(&File::open(path)?)? }; let tx = self.database.begin_write()?; let mut hash_to_children: MultimapTable<[u8], [u8]> = tx.open_multimap_table(Self::HASH_TO_CHILDREN)?; - let mut hash_to_block: Table<[u8], [u8]> = tx.open_table(Self::HASH_TO_BLOCK)?; + let mut hash_to_location: Table<[u8], u64> = tx.open_table(Self::HASH_TO_LOCATION)?; let mut offset = 0; @@ -208,18 +201,15 @@ impl Index { break; } - let magic = &blocks[offset..offset + 4]; - if magic != Network::Bitcoin.magic().to_le_bytes() { - return Err(format!("Unknown magic bytes: {:?}", magic).into()); - } + let rest = &blocks[offset..]; - let len = u32::from_le_bytes(blocks[offset + 4..offset + 8].try_into()?) as usize; - let start = offset + 8; - let end = start + len; + if rest.starts_with(&[0, 0, 0, 0]) { + break; + } - let bytes = &blocks[start..end]; + let block = Self::extract_block(rest)?; - let header = BlockHeader::consensus_decode(&bytes[0..80])?; + let header = BlockHeader::consensus_decode(&block[0..80])?; let hash = header.block_hash(); if header.prev_blockhash == Default::default() { @@ -238,14 +228,14 @@ impl Index { hash_to_children.insert(&header.prev_blockhash, &hash)?; - hash_to_block.insert(&hash, bytes)?; + hash_to_location.insert(&hash, &((i as u64) << 32 | offset as u64))?; - offset = end; + offset = offset + 8 + block.len(); count += 1; } - log::info!("{}/{}: Processed {} blocks…", i + 1, blockfiles, count); + log::info!("{}/{}: Processed {} blocks…", i + 1, blockfiles + 1, count); tx.commit()?; } @@ -254,6 +244,8 @@ impl Index { } fn index_heights(&self) -> Result { + log::info!("Indexing heights…"); + let write = self.database.begin_write()?; let read = self.database.begin_read()?; @@ -289,6 +281,17 @@ impl Index { Ok(()) } + fn extract_block(blocks: &[u8]) -> Result<&[u8]> { + let magic = &blocks[0..4]; + if magic != Network::Bitcoin.magic().to_le_bytes() { + return Err(format!("Unknown magic bytes: {:?}", magic).into()); + } + + let len = u32::from_le_bytes(blocks[4..8].try_into()?) as usize; + + Ok(&blocks[8..8 + len]) + } + pub(crate) fn block(&self, height: u64) -> Result> { let tx = self.database.begin_read()?; @@ -299,14 +302,22 @@ impl Index { Some(guard) => { let hash = guard.to_value(); - let hash_to_block: ReadOnlyTable<[u8], [u8]> = tx.open_table(Self::HASH_TO_BLOCK)?; + let hash_to_location: ReadOnlyTable<[u8], u64> = tx.open_table(Self::HASH_TO_LOCATION)?; + + let location = hash_to_location + .get(hash)? + .ok_or("Could not find block location in index")? + .to_value(); + + let path = self.blockfile_path(location >> 32); + + let offset = (location & 0xFFFFFFFF) as usize; + + let blocks = unsafe { Mmap::map(&File::open(path)?)? }; + + let bytes = Self::extract_block(&blocks[offset..])?; - Ok(Some(Block::consensus_decode( - hash_to_block - .get(hash)? - .ok_or("Could not find block in index")? - .to_value(), - )?)) + Ok(Some(Block::consensus_decode(bytes)?)) } } } diff --git a/src/main.rs b/src/main.rs index 758c738f72..2c93dc5d53 100644 --- a/src/main.rs +++ b/src/main.rs @@ -11,6 +11,7 @@ use { derive_more::{Display, FromStr}, integer_cbrt::IntegerCubeRoot, integer_sqrt::IntegerSquareRoot, + memmap2::Mmap, redb::{ Database, MultimapTable, ReadOnlyMultimapTable, ReadOnlyTable, ReadableMultimapTable, ReadableTable, Table, @@ -19,8 +20,7 @@ use { cmp::Ordering, collections::VecDeque, fmt::{self, Display, Formatter}, - fs::{self, File}, - io, + fs::File, ops::{Add, AddAssign, Deref, Sub}, path::{Path, PathBuf}, process,