From 914a65cea642e6278b0f134291527b416b927e8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Tue, 30 Apr 2024 11:16:44 +0200 Subject: [PATCH] fix: curio seal: Failed commit retry strategy (#11870) * ffi: improved-error-handling * curio seal: Failed commit retry strategy * use master ffi * mod tidy --- curiosrc/seal/task_porep.go | 52 +++++++++++++++++++++++++++++++++++++ extern/filecoin-ffi | 2 +- go.sum | 6 ++++- 3 files changed, 58 insertions(+), 2 deletions(-) diff --git a/curiosrc/seal/task_porep.go b/curiosrc/seal/task_porep.go index fb03ce59bb0..58e307bc020 100644 --- a/curiosrc/seal/task_porep.go +++ b/curiosrc/seal/task_porep.go @@ -3,6 +3,7 @@ package seal import ( "bytes" "context" + "strings" "github.com/ipfs/go-cid" "golang.org/x/xerrors" @@ -111,6 +112,15 @@ func (p *PoRepTask) Do(taskID harmonytask.TaskID, stillOwned func() bool) (done proof, err := p.sc.PoRepSnark(ctx, sr, sealed, unsealed, sectorParams.TicketValue, abi.InteractiveSealRandomness(rand)) if err != nil { + end, err := p.recoverErrors(ctx, sectorParams.SpID, sectorParams.SectorNumber, err) + if err != nil { + return false, xerrors.Errorf("recover errors: %w", err) + } + if end { + // done, but the error handling has stored a different than success state + return true, nil + } + return false, xerrors.Errorf("failed to compute seal proof: %w", err) } @@ -161,4 +171,46 @@ func (p *PoRepTask) Adder(taskFunc harmonytask.AddTaskFunc) { p.sp.pollers[pollerPoRep].Set(taskFunc) } +func (p *PoRepTask) recoverErrors(ctx context.Context, spid, snum int64, cerr error) (end bool, err error) { + const ( + // rust-fil-proofs error strings + // https://github.com/filecoin-project/rust-fil-proofs/blob/3f018b51b6327b135830899d237a7ba181942d7e/storage-proofs-porep/src/stacked/vanilla/proof.rs#L454C1-L463 + errstrInvalidCommD = "Invalid comm_d detected at challenge_index" + errstrInvalidCommR = "Invalid comm_r detected at challenge_index" + errstrInvalidEncoding = "Invalid encoding proof generated at layer" + ) + + if cerr == nil { + return false, xerrors.Errorf("nil error") + } + + switch { + case strings.Contains(cerr.Error(), errstrInvalidCommD): + fallthrough + case strings.Contains(cerr.Error(), errstrInvalidCommR): + // todo: it might be more optimal to just retry the Trees compute first. + // Invalid CommD/R likely indicates a problem with the data computed in that step + // For now for simplicity just retry the whole thing + fallthrough + case strings.Contains(cerr.Error(), errstrInvalidEncoding): + n, err := p.db.Exec(ctx, `UPDATE sectors_sdr_pipeline + SET after_porep = FALSE, after_sdr = FALSE, after_tree_d = FALSE, + after_tree_r = FALSE, after_tree_c = FALSE + WHERE sp_id = $1 AND sector_number = $2`, + spid, snum) + if err != nil { + return false, xerrors.Errorf("store sdr success: updating pipeline: %w", err) + } + if n != 1 { + return false, xerrors.Errorf("store sdr success: updated %d rows", n) + } + + return true, nil + + default: + // if end is false the original error will be returned by the caller + return false, nil + } +} + var _ harmonytask.TaskInterface = &PoRepTask{} diff --git a/extern/filecoin-ffi b/extern/filecoin-ffi index b715c9403fa..ed08caaf877 160000 --- a/extern/filecoin-ffi +++ b/extern/filecoin-ffi @@ -1 +1 @@ -Subproject commit b715c9403faf919e95fdc702cd651e842f18d890 +Subproject commit ed08caaf8778e1b6def83efd37fce41574214353 diff --git a/go.sum b/go.sum index 52755ba03d6..19156912f2f 100644 --- a/go.sum +++ b/go.sum @@ -305,6 +305,7 @@ github.com/filecoin-project/go-amt-ipld/v3 v3.0.0/go.mod h1:Qa95YNAbtoVCTSVtX38a github.com/filecoin-project/go-amt-ipld/v3 v3.1.0 h1:ZNJ9tEG5bE72vBWYiuh5bkxJVM3ViHNOmQ7qew9n6RE= github.com/filecoin-project/go-amt-ipld/v3 v3.1.0/go.mod h1:UjM2QhDFrrjD5s1CdnkJkat4ga+LqZBZgTMniypABRo= github.com/filecoin-project/go-amt-ipld/v4 v4.0.0/go.mod h1:gF053YQ4BIpzTNDoEwHZas7U3oAwncDVGvOHyY8oDpE= +github.com/filecoin-project/go-amt-ipld/v4 v4.2.0/go.mod h1:0eDVF7pROvxrsxvLJx+SJZXqRaXXcEPUcgb/rG0zGU4= github.com/filecoin-project/go-amt-ipld/v4 v4.3.0 h1:bY42N1gR0DqrLMCKUPzX1VhYVgXaETQm0Um4ohvyEP8= github.com/filecoin-project/go-amt-ipld/v4 v4.3.0/go.mod h1:39Ep/yBbF6xN94WevLG9qSbglBJepHa5zeEbAE1pYsc= github.com/filecoin-project/go-bitfield v0.2.0/go.mod h1:CNl9WG8hgR5mttCnUErjcQjGvuiZjRqK9rHVBsQF4oM= @@ -354,7 +355,7 @@ github.com/filecoin-project/go-state-types v0.0.0-20201102161440-c8033295a1fc/go github.com/filecoin-project/go-state-types v0.1.0/go.mod h1:ezYnPf0bNkTsDibL/psSz5dy4B5awOJ/E7P2Saeep8g= github.com/filecoin-project/go-state-types v0.1.6/go.mod h1:UwGVoMsULoCK+bWjEdd/xLCvLAQFBC7EDT477SKml+Q= github.com/filecoin-project/go-state-types v0.1.10/go.mod h1:UwGVoMsULoCK+bWjEdd/xLCvLAQFBC7EDT477SKml+Q= -github.com/filecoin-project/go-state-types v0.11.2-0.20230712101859-8f37624fa540/go.mod h1:SyNPwTsU7I22gL2r0OAPcImvLoTVfgRwdK/Y5rR1zz8= +github.com/filecoin-project/go-state-types v0.13.1/go.mod h1:cHpOPup9H1g2T29dKHAjC2sc7/Ef5ypjuW9A3I+e9yY= github.com/filecoin-project/go-state-types v0.13.3 h1:9JPkC0E6HDtfHbaOIrFiCDzT/Z0jRTb9En4Y4Ci/b3w= github.com/filecoin-project/go-state-types v0.13.3/go.mod h1:cHpOPup9H1g2T29dKHAjC2sc7/Ef5ypjuW9A3I+e9yY= github.com/filecoin-project/go-statemachine v0.0.0-20200925024713-05bd7c71fbfe/go.mod h1:FGwQgZAt2Gh5mjlwJUlVB62JeYdo+if0xWxSEfBD9ig= @@ -953,6 +954,7 @@ github.com/klauspost/compress v1.17.6/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6K github.com/klauspost/cpuid/v2 v2.0.4/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.0.6/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= +github.com/klauspost/cpuid/v2 v2.2.3/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY= github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM= github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= github.com/koalacxr/quantile v0.0.1 h1:wAW+SQ286Erny9wOjVww96t8ws+x5Zj6AKHDULUK+o0= @@ -1681,6 +1683,7 @@ github.com/whyrusleeping/cbor-gen v0.0.0-20200826160007-0b9f6c5fb163/go.mod h1:f github.com/whyrusleeping/cbor-gen v0.0.0-20210118024343-169e9d70c0c2/go.mod h1:fgkXqYy7bV2cFeIEOkVTZS/WjXARfBqSH6Q2qHL33hQ= github.com/whyrusleeping/cbor-gen v0.0.0-20210303213153-67a261a1d291/go.mod h1:fgkXqYy7bV2cFeIEOkVTZS/WjXARfBqSH6Q2qHL33hQ= github.com/whyrusleeping/cbor-gen v0.0.0-20220323183124-98fa8256a799/go.mod h1:fgkXqYy7bV2cFeIEOkVTZS/WjXARfBqSH6Q2qHL33hQ= +github.com/whyrusleeping/cbor-gen v0.1.0/go.mod h1:pM99HXyEbSQHcosHc0iW7YFmwnscr+t9Te4ibko05so= github.com/whyrusleeping/cbor-gen v0.1.1 h1:eKfcJIoxivjMtwfCfmJAqSF56MHcWqyIScXwaC1VBgw= github.com/whyrusleeping/cbor-gen v0.1.1/go.mod h1:pM99HXyEbSQHcosHc0iW7YFmwnscr+t9Te4ibko05so= github.com/whyrusleeping/chunker v0.0.0-20181014151217-fe64bd25879f h1:jQa4QT2UP9WYv2nzyawpKMOCl+Z/jW7djv2/J50lj9E= @@ -2065,6 +2068,7 @@ golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220310020820-b874c991c1a5/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220708085239-5a0f0661e09d/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=