Skip to content

Commit

Permalink
fix: retain knowledge of previous bootstraps in the bootstrap transport
Browse files Browse the repository at this point in the history
  • Loading branch information
tinyzimmer committed Nov 5, 2023
1 parent e1835e9 commit ea451f7
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 6 deletions.
4 changes: 4 additions & 0 deletions pkg/cmd/daemoncmd/connmgr.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"path/filepath"
"runtime"
"sync"
"time"

v1 "github.com/webmeshproj/api/go/v1"
"google.golang.org/grpc/codes"
Expand Down Expand Up @@ -294,6 +295,9 @@ func (m *ConnManager) buildConnConfig(ctx context.Context, req *v1.ConnectReques
default:
conf.Bootstrap.DefaultNetworkPolicy = string(firewall.PolicyAccept)
}
// We only support single node bootstrap for now, so set the initial leader election
// timeouts to a very low value
conf.Bootstrap.ElectionTimeout = time.Millisecond * 500
}
conf.TLS.Insecure = !req.GetTls().GetEnabled()
if !conf.TLS.Insecure {
Expand Down
7 changes: 7 additions & 0 deletions pkg/config/bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"fmt"
"net"
"net/netip"
"path/filepath"
"time"

"github.com/spf13/pflag"
Expand Down Expand Up @@ -217,6 +218,12 @@ func (o *Config) NewBootstrapTransport(ctx context.Context, nodeID string, conn
Timeout: t.TCPConnectTimeout,
ElectionTimeout: o.Bootstrap.ElectionTimeout,
Credentials: conn.Credentials(),
DataDirectory: func() string {
if o.Storage.InMemory {
return ""
}
return filepath.Join(o.Storage.Path, "bootstrap")
}(),
Peers: func() map[string]tcp.BootstrapPeer {
if t.TCPServers == nil {
return nil
Expand Down
32 changes: 28 additions & 4 deletions pkg/meshnet/transport/tcp/bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License.
package tcp

import (
"fmt"
"io"
"log/slog"
"time"
Expand All @@ -29,6 +30,7 @@ import (
"github.com/webmeshproj/webmesh/pkg/meshnet/netutil"
"github.com/webmeshproj/webmesh/pkg/meshnet/transport"
"github.com/webmeshproj/webmesh/pkg/storage/errors"
"github.com/webmeshproj/webmesh/pkg/storage/providers/backends/badgerdb"
)

// BootstrapTransportOptions are options for the TCP transport.
Expand All @@ -49,6 +51,10 @@ type BootstrapTransportOptions struct {
ElectionTimeout time.Duration
// Credentials are the credentials to use when dialing peers.
Credentials []grpc.DialOption
// DataDirectory is the directory to use for the bootstrap transport.
// This is where the results of an initial bootstrap are stored. If not provided,
// an in-memory directory is used.
DataDirectory string
}

// BootstrapPeer is a TCP bootstrap peer.
Expand Down Expand Up @@ -82,7 +88,7 @@ func (t *bootstrapTransport) LeaderElect(ctx context.Context) (isLeader bool, rt
Timeout: t.Timeout,
})
if err != nil {
return false, nil, err
return false, nil, fmt.Errorf("new raft transport: %w", err)
}
defer raftTransport.Close()

Expand All @@ -102,7 +108,7 @@ func (t *bootstrapTransport) LeaderElect(ctx context.Context) (isLeader bool, rt
// Resolve our advertise address
addr, err := netutil.ResolveTCPAddr(ctx, t.Advertise, 15)
if err != nil {
return false, nil, err
return false, nil, fmt.Errorf("resolve advertise address: %w", err)
}

// Build the bootstrap configuration
Expand All @@ -119,7 +125,7 @@ func (t *bootstrapTransport) LeaderElect(ctx context.Context) (isLeader bool, rt
// Resolve the peer address
addr, err := netutil.ResolveTCPAddr(ctx, peer.AdvertiseAddr, 15)
if err != nil {
return false, nil, err
return false, nil, fmt.Errorf("resolve peer advertise address: %w", err)
}
// Append the peer to the configuration
bootstrapConfig.Servers = append(bootstrapConfig.Servers, raft.Server{
Expand All @@ -129,7 +135,22 @@ func (t *bootstrapTransport) LeaderElect(ctx context.Context) (isLeader bool, rt
})
}
log.Debug("Starting bootstrap transport raft instance", slog.String("local-id", string(rftOpts.LocalID)), slog.Any("config", bootstrapConfig))
rft, err := raft.NewRaft(rftOpts, &raft.MockFSM{}, raft.NewInmemStore(), raft.NewInmemStore(), raft.NewInmemSnapshotStore(), raftTransport)
var logStore raft.LogStore
var stableStore raft.StableStore
if t.DataDirectory == "" {
logStore = raft.NewInmemStore()
stableStore = raft.NewInmemStore()
} else {
db, err := badgerdb.NewInMemory(badgerdb.Options{
DiskPath: t.DataDirectory,
})
if err != nil {
return false, nil, err
}
logStore = db
stableStore = db
}
rft, err := raft.NewRaft(rftOpts, &raft.MockFSM{}, logStore, stableStore, raft.NewInmemSnapshotStore(), raftTransport)
if err != nil {
return false, nil, err
}
Expand All @@ -140,6 +161,9 @@ func (t *bootstrapTransport) LeaderElect(ctx context.Context) (isLeader bool, rt
if err == raft.ErrCantBootstrap {
// The cluster was already bootstrapped (basically we took too long to get there)
log.Debug("Bootstrap transport cluster already bootstrapped")
if len(t.Peers) == 0 {
return false, nil, errors.ErrAlreadyBootstrapped
}
// Build a transport that tries to join the other peers
var opts RoundTripOptions
for _, peer := range t.Peers {
Expand Down
8 changes: 6 additions & 2 deletions pkg/meshnode/bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,12 @@ func (s *meshStore) bootstrap(ctx context.Context, opts ConnectOptions) error {
s.log.Debug("Cluster not yet bootstrapped, attempting to bootstrap")
isLeader, joinRT, err := opts.Bootstrap.Transport.LeaderElect(ctx)
if err != nil {
if errors.IsAlreadyBootstrapped(err) && joinRT != nil {
s.log.Info("cluster already bootstrapped, attempting to rejoin as voter")
if errors.IsAlreadyBootstrapped(err) {
if joinRT == nil {
s.log.Info("Cluster already bootstrapped, but we are the only server in the configuration. Recovering from storage.")
return s.recoverWireguard(ctx)
}
s.log.Info("Cluster already bootstrapped, attempting to rejoin as voter")
opts.JoinRoundTripper = joinRT
return s.join(ctx, opts)
}
Expand Down

0 comments on commit ea451f7

Please sign in to comment.