Skip to content

Commit

Permalink
swarm/network: WIP consider all nodes for healthy iteration (#19155)
Browse files Browse the repository at this point in the history
* swarm/network: WIP consider all nodes for healthy iteration

* swarm/network/simulation: extend TestWaitTillHealthy to really check kads are healthy

* cmd/swarm/swarm-snapshot: fixed bugs in snapshot creation binary

* swarm/network/simulation: addressed PR comments

* swarm/network/simulation: defer sim.Clsoe()

* swarm/network/simulation: fixed wrong sim.Close()

* swarm/network/simulation: addressed PR comments

* cmd/swarm/swarm-snapshot: reducing default to 8 nodes, more to 4

* cmd/swarm/swarm-snapshot: extended timeout to 3 mins, or 256 nodes snapshot times out

* swarm/network/simulation: More PR comments
  • Loading branch information
nolash authored and zelig committed Feb 28, 2019
1 parent 505a49e commit 62d9d63
Show file tree
Hide file tree
Showing 5 changed files with 113 additions and 29 deletions.
11 changes: 7 additions & 4 deletions cmd/swarm/swarm-snapshot/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,16 @@ func createSnapshot(filename string, nodes int, services []string) (err error) {
log.Debug("create snapshot", "filename", filename, "nodes", nodes, "services", services)

sim := simulation.New(map[string]simulation.ServiceFunc{
"bzz": func(ctx *adapters.ServiceContext, b *sync.Map) (node.Service, func(), error) {
"bzz": func(ctx *adapters.ServiceContext, bucket *sync.Map) (node.Service, func(), error) {
addr := network.NewAddr(ctx.Config.Node())
kad := network.NewKademlia(addr.Over(), network.NewKadParams())
hp := network.NewHiveParams()
hp.KeepAliveInterval = time.Duration(200) * time.Millisecond
hp.Discovery = true // discovery must be enabled when creating a snapshot

// store the kademlia in the bucket, needed later in the WaitTillHealthy function
bucket.Store(simulation.BucketKeyKademlia, kad)

config := &network.BzzConfig{
OverlayAddr: addr.Over(),
UnderlayAddr: addr.Under(),
Expand All @@ -76,17 +79,17 @@ func createSnapshot(filename string, nodes int, services []string) (err error) {
})
defer sim.Close()

_, err = sim.AddNodes(nodes)
ids, err := sim.AddNodes(nodes)
if err != nil {
return fmt.Errorf("add nodes: %v", err)
}

err = sim.Net.ConnectNodesRing(nil)
err = sim.Net.ConnectNodesRing(ids)
if err != nil {
return fmt.Errorf("connect nodes: %v", err)
}

ctx, cancelSimRun := context.WithTimeout(context.Background(), 2*time.Minute)
ctx, cancelSimRun := context.WithTimeout(context.Background(), 3*time.Minute)
defer cancelSimRun()
if _, err := sim.WaitTillHealthy(ctx); err != nil {
return fmt.Errorf("wait for healthy kademlia: %v", err)
Expand Down
4 changes: 2 additions & 2 deletions cmd/swarm/swarm-snapshot/create_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ func TestSnapshotCreate(t *testing.T) {
},
{
name: "more nodes",
nodes: defaultNodes + 5,
nodes: defaultNodes + 4,
},
{
name: "services",
Expand Down Expand Up @@ -81,7 +81,7 @@ func TestSnapshotCreate(t *testing.T) {
}
testCmd := runSnapshot(t, append(args, file.Name())...)

testCmd.ExpectExit()
testCmd.WaitExit()
if code := testCmd.ExitStatus(); code != 0 {
t.Fatalf("command exit code %v, expected 0", code)
}
Expand Down
2 changes: 1 addition & 1 deletion cmd/swarm/swarm-snapshot/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import (
var gitCommit string // Git SHA1 commit hash of the release (set via linker flags)

// default value for "create" command --nodes flag
const defaultNodes = 10
const defaultNodes = 8

func main() {
err := newApp().Run(os.Args)
Expand Down
5 changes: 3 additions & 2 deletions swarm/network/simulation/kademlia.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ func (s *Simulation) WaitTillHealthy(ctx context.Context) (ill map[enode.ID]*net
for k := range ill {
delete(ill, k)
}
log.Debug("kademlia health check", "addr count", len(addrs))
log.Debug("kademlia health check", "addr count", len(addrs), "kad len", len(kademlias))
for id, k := range kademlias {
//PeerPot for this node
addr := common.Bytes2Hex(k.BaseAddr())
Expand All @@ -70,7 +70,7 @@ func (s *Simulation) WaitTillHealthy(ctx context.Context) (ill map[enode.ID]*net
log.Debug("kademlia", "connectNN", h.ConnectNN, "knowNN", h.KnowNN)
log.Debug("kademlia", "health", h.ConnectNN && h.KnowNN, "addr", hex.EncodeToString(k.BaseAddr()), "node", id)
log.Debug("kademlia", "ill condition", !h.ConnectNN, "addr", hex.EncodeToString(k.BaseAddr()), "node", id)
if !h.ConnectNN {
if !h.Healthy() {
ill[id] = k
}
}
Expand All @@ -85,6 +85,7 @@ func (s *Simulation) WaitTillHealthy(ctx context.Context) (ill map[enode.ID]*net
// in simulation bucket.
func (s *Simulation) kademlias() (ks map[enode.ID]*network.Kademlia) {
items := s.UpNodesItems(BucketKeyKademlia)
log.Debug("kademlia len items", "len", len(items))
ks = make(map[enode.ID]*network.Kademlia, len(items))
for id, v := range items {
k, ok := v.(*network.Kademlia)
Expand Down
120 changes: 100 additions & 20 deletions swarm/network/simulation/kademlia_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,45 +22,125 @@ import (
"testing"
"time"

"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/node"
"github.com/ethereum/go-ethereum/p2p/simulations/adapters"
"github.com/ethereum/go-ethereum/swarm/network"
)

/*
TestWaitTillHealthy tests that we indeed get a healthy network after we wait for it.
For this to be tested, a bit of a snake tail bite needs to happen:
* First we create a first simulation
* Run it as nodes connected in a ring
* Wait until the network is healthy
* Then we create a snapshot
* With this snapshot we create a new simulation
* This simulation is expected to have a healthy configuration, as it uses the snapshot
* Thus we just iterate all nodes and check that their kademlias are healthy
* If all kademlias are healthy, the test succeeded, otherwise it failed
*/
func TestWaitTillHealthy(t *testing.T) {
sim := New(map[string]ServiceFunc{
"bzz": func(ctx *adapters.ServiceContext, b *sync.Map) (node.Service, func(), error) {
addr := network.NewAddr(ctx.Config.Node())
hp := network.NewHiveParams()
config := &network.BzzConfig{
OverlayAddr: addr.Over(),
UnderlayAddr: addr.Under(),
HiveParams: hp,
}
kad := network.NewKademlia(addr.Over(), network.NewKadParams())
// store kademlia in node's bucket under BucketKeyKademlia
// so that it can be found by WaitTillHealthy method.
b.Store(BucketKeyKademlia, kad)
return network.NewBzz(config, kad, nil, nil, nil), nil, nil
},
})
defer sim.Close()

_, err := sim.AddNodesAndConnectRing(10)
testNodesNum := 10

// create the first simulation
sim := New(createSimServiceMap(true))

// connect and...
nodeIDs, err := sim.AddNodesAndConnectRing(testNodesNum)
if err != nil {
t.Fatal(err)
}

// array of all overlay addresses
var addrs [][]byte
// iterate once to be able to build the peer map
for _, node := range nodeIDs {
//get the kademlia overlay address from this ID
a := node.Bytes()
//append it to the array of all overlay addresses
addrs = append(addrs, a)
}
// build a PeerPot only once
pp := network.NewPeerPotMap(network.NewKadParams().NeighbourhoodSize, addrs)

ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
defer cancel()

// ...wait until healthy
ill, err := sim.WaitTillHealthy(ctx)
if err != nil {
for id, kad := range ill {
t.Log("Node", id)
t.Log(kad.String())
}
if err != nil {
t.Fatal(err)
t.Fatal(err)
}

// now create a snapshot of this network
snap, err := sim.Net.Snapshot()
if err != nil {
t.Fatal(err)
}

// close the initial simulation
sim.Close()
// create a control simulation
controlSim := New(createSimServiceMap(false))
defer controlSim.Close()

// load the snapshot into this control simulation
err = controlSim.Net.Load(snap)
if err != nil {
t.Fatal(err)
}
_, err = controlSim.WaitTillHealthy(ctx)
if err != nil {
t.Fatal(err)
}

for _, node := range nodeIDs {
// ...get its kademlia
item, ok := controlSim.NodeItem(node, BucketKeyKademlia)
if !ok {
t.Fatal("No kademlia bucket item")
}
kad := item.(*network.Kademlia)
// get its base address
kid := common.Bytes2Hex(kad.BaseAddr())

//get the health info
info := kad.GetHealthInfo(pp[kid])
log.Trace("Health info", "info", info)
// check that it is healthy
healthy := info.Healthy()
if !healthy {
t.Fatalf("Expected node %v of control simulation to be healthy, but it is not, unhealthy kademlias: %v", node, kad.String())
}
}
}

// createSimServiceMap returns the services map
// this function will create the sim services with or without discovery enabled
// based on the flag passed
func createSimServiceMap(discovery bool) map[string]ServiceFunc {
return map[string]ServiceFunc{
"bzz": func(ctx *adapters.ServiceContext, b *sync.Map) (node.Service, func(), error) {
addr := network.NewAddr(ctx.Config.Node())
hp := network.NewHiveParams()
hp.Discovery = discovery
config := &network.BzzConfig{
OverlayAddr: addr.Over(),
UnderlayAddr: addr.Under(),
HiveParams: hp,
}
kad := network.NewKademlia(addr.Over(), network.NewKadParams())
// store kademlia in node's bucket under BucketKeyKademlia
// so that it can be found by WaitTillHealthy method.
b.Store(BucketKeyKademlia, kad)
return network.NewBzz(config, kad, nil, nil, nil), nil, nil
},
}
}

0 comments on commit 62d9d63

Please sign in to comment.