Skip to content

Commit

Permalink
Distributed POST verification (#5390)
Browse files Browse the repository at this point in the history
Part of #5185, closes #5376

- [x] Local randomness seed to select K3 indices using p2p peer ID
- [x] Verifying K3 indices of POST
- [x] Verifying ALL indices for initial POST
- [x] Publishing POST malfeasance proofs
- [x] Verifying only the invalid index in POST malfeasance proofs
- [x] Verify the candidate for a positioning ATX and its chain
- [x] Verify the candidate for a commitment ATX and its chain
- [x] Wait "some time" for malfeasance proofs before creating an active set
        ☝️ this is covered by ATX grading
- [x] configurable duration after which ATXs (and their chains) are considered valid (in terms of POST labels)
- [x] system tests
  • Loading branch information
poszu committed Feb 2, 2024
1 parent 5b27845 commit f9baccf
Show file tree
Hide file tree
Showing 48 changed files with 1,990 additions and 282 deletions.
10 changes: 9 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,14 @@ configuration is as follows:
The node will automatically migrate the data from disk and store it in the database. The migration will take place at the
first startup after the upgrade.

* [#5390](https://github.com/spacemeshos/go-spacemesh/pull/5390)
Distributed PoST verification.

The nodes on the network can now choose to verify
only a subset of labels in PoST proofs by choosing a K3 value lower than K2.
If a node finds a proof invalid, it will report it to the network by
creating a malfeasance proof. The malicious node will then be blacklisted by the network.

### Features

### Improvements
Expand Down Expand Up @@ -346,7 +354,7 @@ for more information on how to configure the node to work with the PoST service.

### Improvements

* further increased cache sizes and and p2p timeouts to compensate for the increased number of nodes on the network.
* further increased cache sizes and p2p timeouts to compensate for the increased number of nodes on the network.

* [#5329](https://github.com/spacemeshos/go-spacemesh/pull/5329) P2P decentralization improvements. Added support for QUIC
transport and DHT routing discovery for finding peers and relays. Also, added the `ping-peers` feature which is useful
Expand Down
2 changes: 1 addition & 1 deletion Makefile-libs.Inc
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ else
endif
endif

POSTRS_SETUP_REV = 0.6.6
POSTRS_SETUP_REV = 0.7.0
POSTRS_SETUP_ZIP = libpost-$(platform)-v$(POSTRS_SETUP_REV).zip
POSTRS_SETUP_URL_ZIP ?= https://github.com/spacemeshos/post-rs/releases/download/v$(POSTRS_SETUP_REV)/$(POSTRS_SETUP_ZIP)

Expand Down
75 changes: 62 additions & 13 deletions activation/activation.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ const (
// Config defines configuration for Builder.
type Config struct {
GoldenATXID types.ATXID
LayersPerEpoch uint32
RegossipInterval time.Duration
}

Expand Down Expand Up @@ -90,11 +89,18 @@ type Builder struct {
stop context.CancelFunc
poetCfg PoetConfig
poetRetryInterval time.Duration
// delay before PoST in ATX is considered valid (counting from the time it was received)
postValidityDelay time.Duration
}

// BuilderOption ...
type BuilderOption func(*Builder)

func WithPostValidityDelay(delay time.Duration) BuilderOption {
return func(b *Builder) {
b.postValidityDelay = delay
}
}

// WithPoetRetryInterval modifies time that builder will have to wait before retrying ATX build process
// if it failed due to issues with PoET server.
func WithPoetRetryInterval(interval time.Duration) BuilderOption {
Expand Down Expand Up @@ -149,6 +155,7 @@ func NewBuilder(
syncer: syncer,
log: log,
poetRetryInterval: defaultPoetRetryInterval,
postValidityDelay: 12 * time.Hour,
}
for _, opt := range opts {
opt(b)
Expand Down Expand Up @@ -403,7 +410,7 @@ func (b *Builder) buildNIPostChallenge(ctx context.Context) (*types.NIPostChalle
}
}

posAtx, err := b.GetPositioningAtx()
posAtx, err := b.getPositioningAtx(ctx)
if err != nil {
return nil, fmt.Errorf("failed to get positioning ATX: %w", err)
}
Expand Down Expand Up @@ -584,17 +591,24 @@ func (b *Builder) broadcast(ctx context.Context, atx *types.ActivationTx) (int,
return len(buf), nil
}

// GetPositioningAtx returns atx id with the highest tick height.
func (b *Builder) GetPositioningAtx() (types.ATXID, error) {
id, err := atxs.GetIDWithMaxHeight(b.cdb, b.signer.NodeID())
if err != nil {
if errors.Is(err, sql.ErrNotFound) {
b.log.Info("using golden atx as positioning atx")
return b.goldenATXID, nil
}
return types.ATXID{}, fmt.Errorf("cannot find pos atx: %w", err)
// getPositioningAtx returns atx id with the highest tick height.
func (b *Builder) getPositioningAtx(ctx context.Context) (types.ATXID, error) {
id, err := findFullyValidHighTickAtx(
ctx,
b.cdb,
b.signer.NodeID(),
b.goldenATXID,
b.validator,
b.log,
VerifyChainOpts.AssumeValidBefore(time.Now().Add(-b.postValidityDelay)),
VerifyChainOpts.WithTrustedID(b.signer.NodeID()),
VerifyChainOpts.WithLogger(b.log),
)
if errors.Is(err, sql.ErrNotFound) {
b.log.Info("using golden atx as positioning atx")
return b.goldenATXID, nil
}
return id, nil
return id, err
}

func (b *Builder) Regossip(ctx context.Context) error {
Expand Down Expand Up @@ -630,3 +644,38 @@ func buildNipostChallengeStartDeadline(roundStart time.Time, gracePeriod time.Du
jitter := randomDurationInRange(time.Duration(0), gracePeriod*maxNipostChallengeBuildJitter/100.0)
return roundStart.Add(jitter).Add(-gracePeriod)
}

func findFullyValidHighTickAtx(
ctx context.Context,
db sql.Executor,
prefNodeID types.NodeID,
goldenATXID types.ATXID,
validator nipostValidator,
log *zap.Logger,
opts ...VerifyChainOption,
) (types.ATXID, error) {
rejectedAtxs := make(map[types.ATXID]struct{})
filter := func(id types.ATXID) bool {
_, ok := rejectedAtxs[id]
return !ok
}

for {
select {
case <-ctx.Done():
return types.ATXID{}, ctx.Err()
default:
}
id, err := atxs.GetIDWithMaxHeight(db, prefNodeID, filter)
if err != nil {
return types.ATXID{}, err
}

if err := validator.VerifyChain(ctx, id, goldenATXID, opts...); err != nil {
log.Info("rejecting candidate for high-tick atx", zap.Error(err), zap.Stringer("atx_id", id))
rejectedAtxs[id] = struct{}{}
} else {
return id, nil
}
}
}
Loading

0 comments on commit f9baccf

Please sign in to comment.