From c375ee91e99cd9c072f2fe9b535c5cb780b5f8a0 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Mon, 6 Jun 2022 17:09:39 +0200 Subject: [PATCH] cmd/geth, core/state/snapshot: rework journal loading, implement account-check (#24765) * cmd/geth, core/state/snapshot: rework journal loading, implement account-check * core/state/snapshot, cmd/geth: polish code (#37) * core/state/snapshot: minor nits * core/state/snapshot: simplify error logic * cmd/geth: go format Co-authored-by: rjl493456442 --- cmd/geth/snapshot.go | 44 +++++ core/state/snapshot/generate_test.go | 2 +- core/state/snapshot/journal.go | 185 ++++++++++-------- core/state/snapshot/{dangling.go => utils.go} | 125 ++++++------ 4 files changed, 205 insertions(+), 151 deletions(-) rename core/state/snapshot/{dangling.go => utils.go} (55%) diff --git a/cmd/geth/snapshot.go b/cmd/geth/snapshot.go index 9ffc5918cc74..b46c8013ca9f 100644 --- a/cmd/geth/snapshot.go +++ b/cmd/geth/snapshot.go @@ -101,6 +101,18 @@ In other words, this command does the snapshot to trie conversion. Description: ` geth snapshot check-dangling-storage traverses the snap storage data, and verifies that all snapshot storage data has a corresponding account. +`, + }, + { + Name: "inspect-account", + Usage: "Check all snapshot layers for the a specific account", + ArgsUsage: "
", + Action: utils.MigrateFlags(checkAccount), + Category: "MISCELLANEOUS COMMANDS", + Flags: utils.GroupFlags(utils.NetworkFlags, utils.DatabasePathFlags), + Description: ` +geth snapshot inspect-account
checks all snapshot layers and prints out +information about the specified address. `, }, { @@ -535,3 +547,35 @@ func dumpState(ctx *cli.Context) error { "elapsed", common.PrettyDuration(time.Since(start))) return nil } + +// checkAccount iterates the snap data layers, and looks up the given account +// across all layers. +func checkAccount(ctx *cli.Context) error { + if ctx.NArg() != 1 { + return errors.New("need arg") + } + var ( + hash common.Hash + addr common.Address + ) + switch len(ctx.Args()[0]) { + case 40, 42: + addr = common.HexToAddress(ctx.Args()[0]) + hash = crypto.Keccak256Hash(addr.Bytes()) + case 64, 66: + hash = common.HexToHash(ctx.Args()[0]) + default: + return errors.New("malformed address or hash") + } + stack, _ := makeConfigNode(ctx) + defer stack.Close() + chaindb := utils.MakeChainDatabase(ctx, stack, true) + defer chaindb.Close() + start := time.Now() + log.Info("Checking difflayer journal", "address", addr, "hash", hash) + if err := snapshot.CheckJournalAccount(chaindb, hash); err != nil { + return err + } + log.Info("Checked the snapshot journalled storage", "time", common.PrettyDuration(time.Since(start))) + return nil +} diff --git a/core/state/snapshot/generate_test.go b/core/state/snapshot/generate_test.go index 94caed08ad7a..40a234dcb766 100644 --- a/core/state/snapshot/generate_test.go +++ b/core/state/snapshot/generate_test.go @@ -171,7 +171,7 @@ func checkSnapRoot(t *testing.T, snap *diskLayer, trieRoot common.Hash) { t.Fatalf("snaproot: %#x != trieroot #%x", snapRoot, trieRoot) } if err := CheckDanglingStorage(snap.diskdb); err != nil { - t.Fatalf("Detected dangling storages %v", err) + t.Fatalf("Detected dangling storages: %v", err) } } diff --git a/core/state/snapshot/journal.go b/core/state/snapshot/journal.go index 6836a574090c..80cd4eeee42a 100644 --- a/core/state/snapshot/journal.go +++ b/core/state/snapshot/journal.go @@ -108,44 +108,15 @@ func loadAndParseJournal(db ethdb.KeyValueStore, base *diskLayer) (snapshot, jou // So if there is no journal, or the journal is invalid(e.g. the journal // is not matched with disk layer; or the it's the legacy-format journal, // etc.), we just discard all diffs and try to recover them later. - journal := rawdb.ReadSnapshotJournal(db) - if len(journal) == 0 { - log.Warn("Loaded snapshot journal", "diskroot", base.root, "diffs", "missing") - return base, generator, nil - } - r := rlp.NewStream(bytes.NewReader(journal), 0) - - // Firstly, resolve the first element as the journal version - version, err := r.Uint() + var current snapshot = base + err := iterateJournal(db, func(parent common.Hash, root common.Hash, destructSet map[common.Hash]struct{}, accountData map[common.Hash][]byte, storageData map[common.Hash]map[common.Hash][]byte) error { + current = newDiffLayer(current, root, destructSet, accountData, storageData) + return nil + }) if err != nil { - log.Warn("Failed to resolve the journal version", "error", err) return base, generator, nil } - if version != journalVersion { - log.Warn("Discarded the snapshot journal with wrong version", "required", journalVersion, "got", version) - return base, generator, nil - } - // Secondly, resolve the disk layer root, ensure it's continuous - // with disk layer. Note now we can ensure it's the snapshot journal - // correct version, so we expect everything can be resolved properly. - var root common.Hash - if err := r.Decode(&root); err != nil { - return nil, journalGenerator{}, errors.New("missing disk layer root") - } - // The diff journal is not matched with disk, discard them. - // It can happen that Geth crashes without persisting the latest - // diff journal. - if !bytes.Equal(root.Bytes(), base.root.Bytes()) { - log.Warn("Loaded snapshot journal", "diskroot", base.root, "diffs", "unmatched") - return base, generator, nil - } - // Load all the snapshot diffs from the journal - snapshot, err := loadDiffLayer(base, r) - if err != nil { - return nil, journalGenerator{}, err - } - log.Debug("Loaded snapshot journal", "diskroot", base.root, "diffhead", snapshot.Root()) - return snapshot, generator, nil + return current, generator, nil } // loadSnapshot loads a pre-existing state snapshot backed by a key-value store. @@ -218,57 +189,6 @@ func loadSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, return snapshot, false, nil } -// loadDiffLayer reads the next sections of a snapshot journal, reconstructing a new -// diff and verifying that it can be linked to the requested parent. -func loadDiffLayer(parent snapshot, r *rlp.Stream) (snapshot, error) { - // Read the next diff journal entry - var root common.Hash - if err := r.Decode(&root); err != nil { - // The first read may fail with EOF, marking the end of the journal - if err == io.EOF { - return parent, nil - } - return nil, fmt.Errorf("load diff root: %v", err) - } - var destructs []journalDestruct - if err := r.Decode(&destructs); err != nil { - return nil, fmt.Errorf("load diff destructs: %v", err) - } - destructSet := make(map[common.Hash]struct{}) - for _, entry := range destructs { - destructSet[entry.Hash] = struct{}{} - } - var accounts []journalAccount - if err := r.Decode(&accounts); err != nil { - return nil, fmt.Errorf("load diff accounts: %v", err) - } - accountData := make(map[common.Hash][]byte) - for _, entry := range accounts { - if len(entry.Blob) > 0 { // RLP loses nil-ness, but `[]byte{}` is not a valid item, so reinterpret that - accountData[entry.Hash] = entry.Blob - } else { - accountData[entry.Hash] = nil - } - } - var storage []journalStorage - if err := r.Decode(&storage); err != nil { - return nil, fmt.Errorf("load diff storage: %v", err) - } - storageData := make(map[common.Hash]map[common.Hash][]byte) - for _, entry := range storage { - slots := make(map[common.Hash][]byte) - for i, key := range entry.Keys { - if len(entry.Vals[i]) > 0 { // RLP loses nil-ness, but `[]byte{}` is not a valid item, so reinterpret that - slots[key] = entry.Vals[i] - } else { - slots[key] = nil - } - } - storageData[entry.Hash] = slots - } - return loadDiffLayer(newDiffLayer(parent, root, destructSet, accountData, storageData), r) -} - // Journal terminates any in-progress snapshot generation, also implicitly pushing // the progress into the database. func (dl *diskLayer) Journal(buffer *bytes.Buffer) (common.Hash, error) { @@ -345,3 +265,96 @@ func (dl *diffLayer) Journal(buffer *bytes.Buffer) (common.Hash, error) { log.Debug("Journalled diff layer", "root", dl.root, "parent", dl.parent.Root()) return base, nil } + +// journalCallback is a function which is invoked by iterateJournal, every +// time a difflayer is loaded from disk. +type journalCallback = func(parent common.Hash, root common.Hash, destructs map[common.Hash]struct{}, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) error + +// iterateJournal iterates through the journalled difflayers, loading them from +// the database, and invoking the callback for each loaded layer. +// The order is incremental; starting with the bottom-most difflayer, going towards +// the most recent layer. +// This method returns error either if there was some error reading from disk, +// OR if the callback returns an error when invoked. +func iterateJournal(db ethdb.KeyValueReader, callback journalCallback) error { + journal := rawdb.ReadSnapshotJournal(db) + if len(journal) == 0 { + log.Warn("Loaded snapshot journal", "diffs", "missing") + return nil + } + r := rlp.NewStream(bytes.NewReader(journal), 0) + // Firstly, resolve the first element as the journal version + version, err := r.Uint() + if err != nil { + log.Warn("Failed to resolve the journal version", "error", err) + return errors.New("failed to resolve journal version") + } + if version != journalVersion { + log.Warn("Discarded the snapshot journal with wrong version", "required", journalVersion, "got", version) + return errors.New("wrong journal version") + } + // Secondly, resolve the disk layer root, ensure it's continuous + // with disk layer. Note now we can ensure it's the snapshot journal + // correct version, so we expect everything can be resolved properly. + var parent common.Hash + if err := r.Decode(&parent); err != nil { + return errors.New("missing disk layer root") + } + if baseRoot := rawdb.ReadSnapshotRoot(db); baseRoot != parent { + log.Warn("Loaded snapshot journal", "diskroot", baseRoot, "diffs", "unmatched") + return fmt.Errorf("mismatched disk and diff layers") + } + for { + var ( + root common.Hash + destructs []journalDestruct + accounts []journalAccount + storage []journalStorage + destructSet = make(map[common.Hash]struct{}) + accountData = make(map[common.Hash][]byte) + storageData = make(map[common.Hash]map[common.Hash][]byte) + ) + // Read the next diff journal entry + if err := r.Decode(&root); err != nil { + // The first read may fail with EOF, marking the end of the journal + if errors.Is(err, io.EOF) { + return nil + } + return fmt.Errorf("load diff root: %v", err) + } + if err := r.Decode(&destructs); err != nil { + return fmt.Errorf("load diff destructs: %v", err) + } + if err := r.Decode(&accounts); err != nil { + return fmt.Errorf("load diff accounts: %v", err) + } + if err := r.Decode(&storage); err != nil { + return fmt.Errorf("load diff storage: %v", err) + } + for _, entry := range destructs { + destructSet[entry.Hash] = struct{}{} + } + for _, entry := range accounts { + if len(entry.Blob) > 0 { // RLP loses nil-ness, but `[]byte{}` is not a valid item, so reinterpret that + accountData[entry.Hash] = entry.Blob + } else { + accountData[entry.Hash] = nil + } + } + for _, entry := range storage { + slots := make(map[common.Hash][]byte) + for i, key := range entry.Keys { + if len(entry.Vals[i]) > 0 { // RLP loses nil-ness, but `[]byte{}` is not a valid item, so reinterpret that + slots[key] = entry.Vals[i] + } else { + slots[key] = nil + } + } + storageData[entry.Hash] = slots + } + if err := callback(parent, root, destructSet, accountData, storageData); err != nil { + return err + } + parent = root + } +} diff --git a/core/state/snapshot/dangling.go b/core/state/snapshot/utils.go similarity index 55% rename from core/state/snapshot/dangling.go rename to core/state/snapshot/utils.go index ca73da793f7a..fa1f216e6826 100644 --- a/core/state/snapshot/dangling.go +++ b/core/state/snapshot/utils.go @@ -18,9 +18,7 @@ package snapshot import ( "bytes" - "errors" "fmt" - "io" "time" "github.com/ethereum/go-ethereum/common" @@ -34,7 +32,7 @@ import ( // storage also has corresponding account data. func CheckDanglingStorage(chaindb ethdb.KeyValueStore) error { if err := checkDanglingDiskStorage(chaindb); err != nil { - return err + log.Error("Database check error", "err", err) } return checkDanglingMemStorage(chaindb) } @@ -75,81 +73,80 @@ func checkDanglingDiskStorage(chaindb ethdb.KeyValueStore) error { // checkDanglingMemStorage checks if there is any 'dangling' storage in the journalled // snapshot difflayers. func checkDanglingMemStorage(db ethdb.KeyValueStore) error { - var ( - start = time.Now() - journal = rawdb.ReadSnapshotJournal(db) - ) - if len(journal) == 0 { - log.Warn("Loaded snapshot journal", "diffs", "missing") + start := time.Now() + log.Info("Checking dangling journalled storage") + err := iterateJournal(db, func(pRoot, root common.Hash, destructs map[common.Hash]struct{}, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) error { + for accHash := range storage { + if _, ok := accounts[accHash]; !ok { + log.Error("Dangling storage - missing account", "account", fmt.Sprintf("%#x", accHash), "root", root) + } + } return nil - } - r := rlp.NewStream(bytes.NewReader(journal), 0) - // Firstly, resolve the first element as the journal version - version, err := r.Uint() + }) if err != nil { - log.Warn("Failed to resolve the journal version", "error", err) - return nil - } - if version != journalVersion { - log.Warn("Discarded the snapshot journal with wrong version", "required", journalVersion, "got", version) - return nil - } - // Secondly, resolve the disk layer root, ensure it's continuous - // with disk layer. Note now we can ensure it's the snapshot journal - // correct version, so we expect everything can be resolved properly. - var root common.Hash - if err := r.Decode(&root); err != nil { - return errors.New("missing disk layer root") - } - // The diff journal is not matched with disk, discard them. - // It can happen that Geth crashes without persisting the latest - // diff journal. - // Load all the snapshot diffs from the journal - if err := checkDanglingJournalStorage(r); err != nil { + log.Info("Failed to resolve snapshot journal", "err", err) return err } log.Info("Verified the snapshot journalled storage", "time", common.PrettyDuration(time.Since(start))) return nil } -// loadDiffLayer reads the next sections of a snapshot journal, reconstructing a new -// diff and verifying that it can be linked to the requested parent. -func checkDanglingJournalStorage(r *rlp.Stream) error { - for { - // Read the next diff journal entry - var root common.Hash - if err := r.Decode(&root); err != nil { - // The first read may fail with EOF, marking the end of the journal - if err == io.EOF { - return nil - } - return fmt.Errorf("load diff root: %v", err) +// CheckJournalAccount shows information about an account, from the disk layer and +// up through the diff layers. +func CheckJournalAccount(db ethdb.KeyValueStore, hash common.Hash) error { + // Look up the disk layer first + baseRoot := rawdb.ReadSnapshotRoot(db) + fmt.Printf("Disklayer: Root: %x\n", baseRoot) + if data := rawdb.ReadAccountSnapshot(db, hash); data != nil { + account := new(Account) + if err := rlp.DecodeBytes(data, account); err != nil { + panic(err) } - var destructs []journalDestruct - if err := r.Decode(&destructs); err != nil { - return fmt.Errorf("load diff destructs: %v", err) + fmt.Printf("\taccount.nonce: %d\n", account.Nonce) + fmt.Printf("\taccount.balance: %x\n", account.Balance) + fmt.Printf("\taccount.root: %x\n", account.Root) + fmt.Printf("\taccount.codehash: %x\n", account.CodeHash) + } + // Check storage + { + it := rawdb.NewKeyLengthIterator(db.NewIterator(append(rawdb.SnapshotStoragePrefix, hash.Bytes()...), nil), 1+2*common.HashLength) + fmt.Printf("\tStorage:\n") + for it.Next() { + slot := it.Key()[33:] + fmt.Printf("\t\t%x: %x\n", slot, it.Value()) } - var accounts []journalAccount - if err := r.Decode(&accounts); err != nil { - return fmt.Errorf("load diff accounts: %v", err) + it.Release() + } + var depth = 0 + + return iterateJournal(db, func(pRoot, root common.Hash, destructs map[common.Hash]struct{}, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) error { + _, a := accounts[hash] + _, b := destructs[hash] + _, c := storage[hash] + depth++ + if !a && !b && !c { + return nil } - accountData := make(map[common.Hash][]byte) - for _, entry := range accounts { - if len(entry.Blob) > 0 { // RLP loses nil-ness, but `[]byte{}` is not a valid item, so reinterpret that - accountData[entry.Hash] = entry.Blob - } else { - accountData[entry.Hash] = nil + fmt.Printf("Disklayer+%d: Root: %x, parent %x\n", depth, root, pRoot) + if data, ok := accounts[hash]; ok { + account := new(Account) + if err := rlp.DecodeBytes(data, account); err != nil { + panic(err) } + fmt.Printf("\taccount.nonce: %d\n", account.Nonce) + fmt.Printf("\taccount.balance: %x\n", account.Balance) + fmt.Printf("\taccount.root: %x\n", account.Root) + fmt.Printf("\taccount.codehash: %x\n", account.CodeHash) } - var storage []journalStorage - if err := r.Decode(&storage); err != nil { - return fmt.Errorf("load diff storage: %v", err) + if _, ok := destructs[hash]; ok { + fmt.Printf("\t Destructed!") } - for _, entry := range storage { - if _, ok := accountData[entry.Hash]; !ok { - log.Error("Dangling storage - missing account", "account", fmt.Sprintf("%#x", entry.Hash), "root", root) - return fmt.Errorf("dangling journal snapshot storage account %#x", entry.Hash) + if data, ok := storage[hash]; ok { + fmt.Printf("\tStorage\n") + for k, v := range data { + fmt.Printf("\t\t%x: %x\n", k, v) } } - } + return nil + }) }