From 140ee32dbfb710d87fed294d508f3092beaa493c Mon Sep 17 00:00:00 2001 From: Gary Rong Date: Fri, 21 Jun 2024 16:03:32 +0800 Subject: [PATCH 1/2] core/state/snapshot: verify account in range at interruption point --- core/state/snapshot/disklayer.go | 23 +++++++++++++++++++++ core/state/snapshot/generate.go | 9 +-------- core/state/snapshot/snapshot.go | 34 ++++++-------------------------- 3 files changed, 30 insertions(+), 36 deletions(-) diff --git a/core/state/snapshot/disklayer.go b/core/state/snapshot/disklayer.go index f5518a204ca1..76928edf07f7 100644 --- a/core/state/snapshot/disklayer.go +++ b/core/state/snapshot/disklayer.go @@ -74,6 +74,14 @@ func (dl *diskLayer) Stale() bool { return dl.stale } +// markStale sets the stale flag as true. +func (dl *diskLayer) markStale() { + dl.lock.Lock() + defer dl.lock.Unlock() + + dl.stale = true +} + // Account directly retrieves the account associated with a particular hash in // the snapshot slim data format. func (dl *diskLayer) Account(hash common.Hash) (*types.SlimAccount, error) { @@ -175,3 +183,18 @@ func (dl *diskLayer) Storage(accountHash, storageHash common.Hash) ([]byte, erro func (dl *diskLayer) Update(blockHash common.Hash, destructs map[common.Hash]struct{}, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer { return newDiffLayer(dl, blockHash, destructs, accounts, storage) } + +// stopGeneration aborts the state snapshot generation if it is currently running. +func (dl *diskLayer) stopGeneration() { + dl.lock.RLock() + generating := dl.genMarker != nil + dl.lock.RUnlock() + if !generating { + return + } + if dl.genAbort != nil { + abort := make(chan *generatorStats) + dl.genAbort <- abort + <-abort + } +} diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index d81a628c91fa..6d9e16307516 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -631,16 +631,10 @@ func generateAccounts(ctx *generatorContext, dl *diskLayer, accMarker []byte) er accMarker = nil return nil } - // Always reset the initial account range as 1 whenever recover from the - // interruption. TODO(rjl493456442) can we remove it? - var accountRange = accountCheckRange - if len(accMarker) > 0 { - accountRange = 1 - } origin := common.CopyBytes(accMarker) for { id := trie.StateTrieID(dl.root) - exhausted, last, err := dl.generateRange(ctx, id, rawdb.SnapshotAccountPrefix, snapAccount, origin, accountRange, onAccount, types.FullAccountRLP) + exhausted, last, err := dl.generateRange(ctx, id, rawdb.SnapshotAccountPrefix, snapAccount, origin, accountCheckRange, onAccount, types.FullAccountRLP) if err != nil { return err // The procedure it aborted, either by external signal or internal error. } @@ -652,7 +646,6 @@ func generateAccounts(ctx *generatorContext, dl *diskLayer, accMarker []byte) er ctx.removeStorageLeft() break } - accountRange = accountCheckRange } return nil } diff --git a/core/state/snapshot/snapshot.go b/core/state/snapshot/snapshot.go index 752f4359fb85..6cc8f3831c24 100644 --- a/core/state/snapshot/snapshot.go +++ b/core/state/snapshot/snapshot.go @@ -258,24 +258,9 @@ func (t *Tree) Disable() { for _, layer := range t.layers { switch layer := layer.(type) { case *diskLayer: - - layer.lock.RLock() - generating := layer.genMarker != nil - layer.lock.RUnlock() - if !generating { - // Generator is already aborted or finished - break - } - // If the base layer is generating, abort it - if layer.genAbort != nil { - abort := make(chan *generatorStats) - layer.genAbort <- abort - <-abort - } - // Layer should be inactive now, mark it as stale - layer.lock.Lock() - layer.stale = true - layer.lock.Unlock() + layer.stopGeneration() + layer.markStale() + layer.Release() case *diffLayer: // If the layer is a simple diff, simply mark as stale @@ -730,16 +715,9 @@ func (t *Tree) Rebuild(root common.Hash) { for _, layer := range t.layers { switch layer := layer.(type) { case *diskLayer: - // If the base layer is generating, abort it and save - if layer.genAbort != nil { - abort := make(chan *generatorStats) - layer.genAbort <- abort - <-abort - } - // Layer should be inactive now, mark it as stale - layer.lock.Lock() - layer.stale = true - layer.lock.Unlock() + layer.stopGeneration() + layer.markStale() + layer.Release() case *diffLayer: // If the layer is a simple diff, simply mark as stale From 01b294880b7afc2b78662ead0b0bbd6c5bca32a2 Mon Sep 17 00:00:00 2001 From: Gary Rong Date: Fri, 6 Sep 2024 18:28:35 +0800 Subject: [PATCH 2/2] core/state/snapshot: add a todo for the potential hang --- core/state/snapshot/snapshot.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/core/state/snapshot/snapshot.go b/core/state/snapshot/snapshot.go index 6cc8f3831c24..d1ffb5cc2d97 100644 --- a/core/state/snapshot/snapshot.go +++ b/core/state/snapshot/snapshot.go @@ -258,6 +258,8 @@ func (t *Tree) Disable() { for _, layer := range t.layers { switch layer := layer.(type) { case *diskLayer: + // TODO this function will hang if it's called twice. Will + // fix it in the following PRs. layer.stopGeneration() layer.markStale() layer.Release() @@ -715,6 +717,8 @@ func (t *Tree) Rebuild(root common.Hash) { for _, layer := range t.layers { switch layer := layer.(type) { case *diskLayer: + // TODO this function will hang if it's called twice. Will + // fix it in the following PRs. layer.stopGeneration() layer.markStale() layer.Release()