Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Merged by Bors] - Fix/6041 bug #6053

Closed
wants to merge 25 commits into from
Closed
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 63 additions & 26 deletions activation/activation.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
package activation

import (
"bytes"
"context"
"errors"
"fmt"
Expand Down Expand Up @@ -39,12 +40,13 @@

// PoetConfig is the configuration to interact with the poet server.
type PoetConfig struct {
PhaseShift time.Duration `mapstructure:"phase-shift"`
CycleGap time.Duration `mapstructure:"cycle-gap"`
GracePeriod time.Duration `mapstructure:"grace-period"`
RequestTimeout time.Duration `mapstructure:"poet-request-timeout"`
RequestRetryDelay time.Duration `mapstructure:"retry-delay"`
MaxRequestRetries int `mapstructure:"retry-max"`
PhaseShift time.Duration `mapstructure:"phase-shift"`
CycleGap time.Duration `mapstructure:"cycle-gap"`
GracePeriod time.Duration `mapstructure:"grace-period"`
RequestTimeout time.Duration `mapstructure:"poet-request-timeout"`
RequestRetryDelay time.Duration `mapstructure:"retry-delay"`
PositioningATXSelectionTimeout time.Duration `mapstructure:"positioning-atx-selection-timeout"`
MaxRequestRetries int `mapstructure:"retry-max"`
}

func DefaultPoetConfig() PoetConfig {
Expand Down Expand Up @@ -203,6 +205,7 @@
for _, opt := range opts {
opt(b)
}

return b
}

Expand Down Expand Up @@ -548,7 +551,7 @@
}
publish := current + 1
metrics.PublishOntimeWindowLatency.Observe(until.Seconds())
wait := buildNipostChallengeStartDeadline(b.poetRoundStart(current), b.poetCfg.GracePeriod)
wait := b.poetRoundStart(current).Add(-b.poetCfg.GracePeriod)
fasmat marked this conversation as resolved.
Show resolved Hide resolved
if time.Until(wait) > 0 {
logger.Info("paused building NiPoST challenge. Waiting until closer to poet start to get a better posATX",
zap.Duration("till poet round", until),
Expand Down Expand Up @@ -585,6 +588,7 @@
}
return nil, fmt.Errorf("initial POST is invalid: %w", err)
}

posAtx, err := b.getPositioningAtx(ctx, nodeID, publish, nil)
if err != nil {
return nil, fmt.Errorf("failed to get positioning ATX: %w", err)
Expand All @@ -604,7 +608,6 @@
case err != nil:
return nil, fmt.Errorf("get last ATX: %w", err)
default:
// regular ATX challenge
posAtx, err := b.getPositioningAtx(ctx, nodeID, publish, prevAtx)
if err != nil {
return nil, fmt.Errorf("failed to get positioning ATX: %w", err)
Expand Down Expand Up @@ -849,10 +852,13 @@
ctx context.Context,
nodeID types.NodeID,
publish types.EpochID,
previous *types.ActivationTx,
) (types.ATXID, error) {
logger := b.logger.With(log.ZShortStringer("smesherID", nodeID), zap.Uint32("publish epoch", publish.Uint32()))

b.posAtxFinder.finding.Lock()
defer b.posAtxFinder.finding.Unlock()

if found := b.posAtxFinder.found; found != nil && found.forPublish == publish {
logger.Debug("using cached positioning atx", log.ZShortStringer("atx_id", found.id))
return found.id, nil
Expand All @@ -862,11 +868,13 @@
if err != nil {
return types.EmptyATXID, fmt.Errorf("get latest epoch: %w", err)
}

logger.Info("searching for positioning atx", zap.Uint32("latest_epoch", latestPublished.Uint32()))

// positioning ATX publish epoch must be lower than the publish epoch of built ATX
positioningAtxPublished := min(latestPublished, publish-1)
id, err := findFullyValidHighTickAtx(
ctx,
context.WithValue(ctx, prioritizedVerifyCall, true),
fasmat marked this conversation as resolved.
Show resolved Hide resolved
b.atxsdata,
positioningAtxPublished,
b.conf.GoldenATXID,
Expand All @@ -877,9 +885,15 @@
VerifyChainOpts.WithLogger(b.logger),
)
if err != nil {
logger.Info("search failed - using golden atx as positioning atx", zap.Error(err))
id = b.conf.GoldenATXID
if previous != nil {
acud marked this conversation as resolved.
Show resolved Hide resolved
id = previous.ID()
poszu marked this conversation as resolved.
Show resolved Hide resolved
logger.Info("search failed - using previous atx as positioning atx", zap.Error(err))
} else {
id = b.conf.GoldenATXID
logger.Info("search failed - using golden atx as positioning atx", zap.Error(err))
}
}

b.posAtxFinder.found = &struct {
id types.ATXID
forPublish types.EpochID
Expand All @@ -897,20 +911,23 @@
publish types.EpochID,
previous *types.ActivationTx,
) (types.ATXID, error) {
id, err := b.searchPositioningAtx(ctx, nodeID, publish)
ctxWithTimeout := ctx
var cancel context.CancelFunc

if b.poetCfg.PositioningATXSelectionTimeout > 0 {
ctxWithTimeout, cancel = context.WithTimeout(ctx, b.poetCfg.PositioningATXSelectionTimeout)
defer cancel()

Check warning on line 919 in activation/activation.go

View check run for this annotation

Codecov / codecov/patch

activation/activation.go#L918-L919

Added lines #L918 - L919 were not covered by tests
}
fasmat marked this conversation as resolved.
Show resolved Hide resolved

id, err := b.searchPositioningAtx(ctxWithTimeout, nodeID, publish, previous)
if err != nil {
return types.EmptyATXID, err
}

if previous != nil {
switch {
case id == b.conf.GoldenATXID:
id = previous.ID()
case id != b.conf.GoldenATXID:
if candidate, err := atxs.Get(b.db, id); err == nil {
if previous.TickHeight() >= candidate.TickHeight() {
id = previous.ID()
}
if previous != nil && !bytes.Equal(id.Bytes(), previous.ID().Bytes()) {
fasmat marked this conversation as resolved.
Show resolved Hide resolved
if candidate, err := atxs.Get(b.db, id); err == nil {
ConvallariaMaj marked this conversation as resolved.
Show resolved Hide resolved
if previous.TickHeight() >= candidate.TickHeight() {
id = previous.ID()
}
}
}
Expand Down Expand Up @@ -965,9 +982,24 @@
logger *zap.Logger,
opts ...VerifyChainOption,
) (types.ATXID, error) {
var found *types.ATXID
atxdata.IterateHighTicksInEpoch(publish+1, func(id types.ATXID) bool {
var (
found *types.ATXID
emptyAtx types.ATXID
ctxErr error
)

// iterate trough epochs, to get first valid, not malicious ATX with the biggest height
atxdata.IterateHighTicksInEpoch(publish+1, func(id types.ATXID) (contSearch bool) {
logger.Info("found candidate for high-tick atx", log.ZShortStringer("id", id))

if ctx.Err() != nil {
logger.Info("got error in context", log.ZShortStringer("id", id))
fasmat marked this conversation as resolved.
Show resolved Hide resolved
ctxErr = ctx.Err()
return false
}

// verify ATX-candidate by getting their dependencies (previous Atx, positioning ATX etc.)
// and verifying PoST for every dependency
if err := validator.VerifyChain(ctx, id, goldenATXID, opts...); err != nil {
logger.Info("rejecting candidate for high-tick atx", zap.Error(err), log.ZShortStringer("id", id))
return true
Expand All @@ -976,8 +1008,13 @@
return false
})

if found != nil {
return *found, nil
if ctxErr != nil {
return emptyAtx, ctxErr
}
fasmat marked this conversation as resolved.
Show resolved Hide resolved
return types.ATXID{}, ErrNotFound

if found == nil {
return emptyAtx, ErrNotFound
}
fasmat marked this conversation as resolved.
Show resolved Hide resolved

return *found, nil
}
41 changes: 40 additions & 1 deletion activation/activation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1456,7 +1456,7 @@ func TestGetPositioningAtx(t *testing.T) {
prev.SetID(types.RandomATXID())

tab.mValidator.EXPECT().VerifyChain(gomock.Any(), atxInDb.ID(), tab.goldenATXID, gomock.Any())
found, err := tab.searchPositioningAtx(context.Background(), types.EmptyNodeID, 99)
found, err := tab.searchPositioningAtx(context.Background(), types.EmptyNodeID, 99, prev)
require.NoError(t, err)
require.Equal(t, atxInDb.ID(), found)

Expand All @@ -1471,6 +1471,45 @@ func TestGetPositioningAtx(t *testing.T) {
require.NoError(t, err)
require.Equal(t, prev.ID(), selected)
})
t.Run("prefers own previous or golded when positioning ATX selection timout expired", func(t *testing.T) {
tab := newTestBuilder(t, 1)

atxInDb := &types.ActivationTx{TickCount: 100}
atxInDb.SetID(types.RandomATXID())
require.NoError(t, atxs.Add(tab.db, atxInDb))
tab.atxsdata.AddFromAtx(atxInDb, false)

prev := &types.ActivationTx{TickCount: 90}
prev.SetID(types.RandomATXID())

// no timeout set up
tab.mValidator.EXPECT().VerifyChain(gomock.Any(), atxInDb.ID(), tab.goldenATXID, gomock.Any())
found, err := tab.getPositioningAtx(context.Background(), types.EmptyNodeID, 99, prev)
require.NoError(t, err)
require.Equal(t, atxInDb.ID(), found)

tab.posAtxFinder.found = nil

// timeout set up, prev ATX exists
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Millisecond)
defer cancel()
time.Sleep(10 * time.Millisecond)
fasmat marked this conversation as resolved.
Show resolved Hide resolved

selected, err := tab.getPositioningAtx(ctx, types.EmptyNodeID, 99, prev)
require.NoError(t, err)
require.Equal(t, prev.ID(), selected)

tab.posAtxFinder.found = nil

// timeout set up, prev ATX do not exists
ctx, cancel = context.WithTimeout(context.Background(), 1*time.Millisecond)
defer cancel()
time.Sleep(10 * time.Millisecond)
fasmat marked this conversation as resolved.
Show resolved Hide resolved

selected, err = tab.getPositioningAtx(ctx, types.EmptyNodeID, 99, nil)
require.NoError(t, err)
require.Equal(t, tab.goldenATXID, selected)
})
}

func TestFindFullyValidHighTickAtx(t *testing.T) {
Expand Down
25 changes: 19 additions & 6 deletions activation/post_verifier.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ type verifyPostJob struct {
result chan error
}

type prioritizedVerifyCallKey int

const prioritizedVerifyCall prioritizedVerifyCallKey = 1

type postStatesGetter interface {
Get() map[types.NodeID]types.PostState
}
Expand Down Expand Up @@ -296,6 +300,8 @@ func (v *offloadingPostVerifier) scale(target int) {
}
}

// Verify creates a Job from given parameters, adds to jobs queue (prioritized or not)
// and waits for result of Job execution.
func (v *offloadingPostVerifier) Verify(
ctx context.Context,
p *shared.Proof,
Expand All @@ -314,13 +320,20 @@ func (v *offloadingPostVerifier) Verify(
defer metrics.PostVerificationQueue.Dec()

var jobChannel chan<- *verifyPostJob
_, prioritize := v.prioritizedIds[types.BytesToNodeID(m.NodeId)]
switch {
case prioritize:
v.log.Debug("prioritizing post verification", zap.Stringer("proof_node_id", types.BytesToNodeID(m.NodeId)))

if ctx.Value(prioritizedVerifyCall) == true {
v.log.Debug("prioritizing current post verification call")
jobChannel = v.prioritized
default:
jobChannel = v.jobs
} else {
_, prioritize := v.prioritizedIds[types.BytesToNodeID(m.NodeId)]
switch {
case prioritize:
v.log.Debug("prioritizing post verification by Node ID",
zap.Stringer("proof_node_id", types.BytesToNodeID(m.NodeId)))
jobChannel = v.prioritized
default:
jobChannel = v.jobs
}
}
fasmat marked this conversation as resolved.
Show resolved Hide resolved

select {
Expand Down
9 changes: 9 additions & 0 deletions activation/post_verifier_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,15 @@ func TestPostVerifierPrioritization(t *testing.T) {
err := v.Verify(context.Background(), &shared.Proof{}, &shared.ProofMetadata{NodeId: nodeID.Bytes()})
require.NoError(t, err)

verifier.EXPECT().
Verify(context.WithValue(context.Background(), prioritizedVerifyCall, true),
gomock.Any(), &shared.ProofMetadata{NodeId: nodeID.Bytes()}, gomock.Any()).
Return(nil)

err = v.Verify(context.WithValue(context.Background(), prioritizedVerifyCall, true),
&shared.Proof{}, &shared.ProofMetadata{NodeId: nodeID.Bytes()})
require.NoError(t, err)

verifier.EXPECT().Close().Return(nil)
require.NoError(t, v.Close())
}
Expand Down
7 changes: 4 additions & 3 deletions activation/validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ func (v *Validator) NIPost(
}

func (v *Validator) PoetMembership(
ctx context.Context,
_ context.Context,
acud marked this conversation as resolved.
Show resolved Hide resolved
membership *types.MultiMerkleProof,
postChallenge types.Hash32,
poetChallenges [][]byte,
Expand Down Expand Up @@ -417,7 +417,7 @@ type atxDeps struct {
commitment types.ATXID
}

func (v *Validator) getAtxDeps(ctx context.Context, db sql.Executor, id types.ATXID) (*atxDeps, error) {
func (v *Validator) getAtxDeps(ctx context.Context, id types.ATXID) (*atxDeps, error) {
var blob sql.Blob
version, err := atxs.LoadBlob(ctx, v.db, id.Bytes(), &blob)
if err != nil {
Expand Down Expand Up @@ -525,10 +525,11 @@ func (v *Validator) verifyChainWithOpts(
}

// validate POST fully
deps, err := v.getAtxDeps(ctx, v.db, id)
deps, err := v.getAtxDeps(ctx, id)
if err != nil {
return fmt.Errorf("getting ATX dependencies: %w", err)
}

if err := v.Post(
ctx,
atx.SmesherID,
Expand Down
10 changes: 6 additions & 4 deletions config/mainnet.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,10 +156,12 @@ func MainnetConfig() Config {
BeaconSyncWeightUnits: 800,
},
POET: activation.PoetConfig{
PhaseShift: 240 * time.Hour,
CycleGap: 12 * time.Hour,
GracePeriod: 1 * time.Hour,
RequestTimeout: 1100 * time.Second, // RequestRetryDelay * 2 * MaxRequestRetries*(MaxRequestRetries+1)/2
PhaseShift: 240 * time.Hour,
CycleGap: 12 * time.Hour,
GracePeriod: 1 * time.Hour,
PositioningATXSelectionTimeout: 50 * time.Minute,
// RequestRetryDelay * 2 * MaxRequestRetries*(MaxRequestRetries+1)/2
fasmat marked this conversation as resolved.
Show resolved Hide resolved
RequestTimeout: 1100 * time.Second,
RequestRetryDelay: 10 * time.Second,
MaxRequestRetries: 10,
},
Expand Down
3 changes: 2 additions & 1 deletion systest/parameters/bignet/smesher.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
"poet": {
"phase-shift": "300m",
"cycle-gap": "40m",
"grace-period": "10m"
"grace-period": "10m",
"positioning-atx-selection-timeout":"7m"
},
"tortoise": {
"tortoise-zdist": 4,
Expand Down
3 changes: 2 additions & 1 deletion systest/parameters/fastnet/smesher.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
"poet": {
"phase-shift": "30s",
"cycle-gap": "30s",
"grace-period": "10s"
"grace-period": "10s",
"positioning-atx-selection-timeout":"7s"
},
"api": {
"grpc-public-listener": "0.0.0.0:9092",
Expand Down
3 changes: 2 additions & 1 deletion systest/parameters/longfast/smesher.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
"poet": {
"phase-shift": "10m",
"cycle-gap": "1m",
"grace-period": "5s"
"grace-period": "5s",
"positioning-atx-selection-timeout":"3s"
},
"tortoise": {
"tortoise-zdist": 4,
Expand Down
Loading