From 2af24724dd5f3ab1994001854eb32c6a19f9f64a Mon Sep 17 00:00:00 2001 From: holisticode Date: Thu, 14 Feb 2019 13:01:50 -0500 Subject: [PATCH] swarm/network: Saturation check for healthy networks (#19071) * swarm/network: new saturation for implementation * swarm/network: re-added saturation func in Kademlia as it is used elsewhere * swarm/network: saturation with higher MinBinSize * swarm/network: PeersPerBin with depth check * swarm/network: edited tests to pass new saturated check * swarm/network: minor fix saturated check * swarm/network/simulations/discovery: fixed renamed RPC call * swarm/network: renamed to isSaturated and returns bool * swarm/network: early depth check --- swarm/network/kademlia.go | 74 ++++++++-- swarm/network/kademlia_test.go | 126 +++++++++++++++--- swarm/network/simulation/kademlia.go | 2 +- .../simulations/discovery/discovery_test.go | 6 +- 4 files changed, 179 insertions(+), 29 deletions(-) diff --git a/swarm/network/kademlia.go b/swarm/network/kademlia.go index 1193e3b652e4..146f39106769 100644 --- a/swarm/network/kademlia.go +++ b/swarm/network/kademlia.go @@ -628,7 +628,8 @@ func (k *Kademlia) string() string { // used for testing only // TODO move to separate testing tools file type PeerPot struct { - NNSet [][]byte + NNSet [][]byte + PeersPerBin []int } // NewPeerPotMap creates a map of pot record of *BzzAddr with keys @@ -654,6 +655,7 @@ func NewPeerPotMap(neighbourhoodSize int, addrs [][]byte) map[string]*PeerPot { // all nn-peers var nns [][]byte + peersPerBin := make([]int, depth) // iterate through the neighbours, going from the deepest to the shallowest np.EachNeighbour(a, Pof, func(val pot.Val, po int) bool { @@ -667,14 +669,18 @@ func NewPeerPotMap(neighbourhoodSize int, addrs [][]byte) map[string]*PeerPot { // a neighbor is any peer in or deeper than the depth if po >= depth { nns = append(nns, addr) - return true + } else { + // for peers < depth, we just count the number in each bin + // the bin is the index of the slice + peersPerBin[po]++ } - return false + return true }) - log.Trace(fmt.Sprintf("%x PeerPotMap NNS: %s", addrs[i][:4], LogAddrs(nns))) + log.Trace(fmt.Sprintf("%x PeerPotMap NNS: %s, peersPerBin", addrs[i][:4], LogAddrs(nns))) ppmap[common.Bytes2Hex(a)] = &PeerPot{ - NNSet: nns, + NNSet: nns, + PeersPerBin: peersPerBin, } } return ppmap @@ -698,6 +704,39 @@ func (k *Kademlia) saturation() int { return prev } +// isSaturated returns true if the kademlia is considered saturated, or false if not. +// It checks this by checking an array of ints called unsaturatedBins; each item in that array corresponds +// to the bin which is unsaturated (number of connections < k.MinBinSize). +// The bin is considered unsaturated only if there are actual peers in that PeerPot's bin (peersPerBin) +// (if there is no peer for a given bin, then no connection could ever be established; +// in a God's view this is relevant as no more peers will ever appear on that bin) +func (k *Kademlia) isSaturated(peersPerBin []int, depth int) bool { + // depth could be calculated from k but as this is called from `GetHealthInfo()`, + // the depth has already been calculated so we can require it as a parameter + + // early check for depth + if depth != len(peersPerBin) { + return false + } + unsaturatedBins := make([]int, 0) + k.conns.EachBin(k.base, Pof, 0, func(po, size int, f func(func(val pot.Val) bool) bool) bool { + + if po >= depth { + return false + } + log.Trace("peers per bin", "peersPerBin[po]", peersPerBin[po], "po", po) + // if there are actually peers in the PeerPot who can fulfill k.MinBinSize + if size < k.MinBinSize && size < peersPerBin[po] { + log.Trace("connections for po", "po", po, "size", size) + unsaturatedBins = append(unsaturatedBins, po) + } + return true + }) + + log.Trace("list of unsaturated bins", "unsaturatedBins", unsaturatedBins) + return len(unsaturatedBins) == 0 +} + // knowNeighbours tests if all neighbours in the peerpot // are found among the peers known to the kademlia // It is used in Healthy function for testing only @@ -780,11 +819,13 @@ type Health struct { ConnectNN bool // whether node is connected to all its neighbours CountConnectNN int // amount of neighbours connected to MissingConnectNN [][]byte // which neighbours we should have been connected to but we're not - Saturated bool // whether we are connected to all the peers we would have liked to - Hive string + // Saturated: if in all bins < depth number of connections >= MinBinsize or, + // if number of connections < MinBinSize, to the number of available peers in that bin + Saturated bool + Hive string } -// Healthy reports the health state of the kademlia connectivity +// GetHealthInfo reports the health state of the kademlia connectivity // // The PeerPot argument provides an all-knowing view of the network // The resulting Health object is a result of comparisons between @@ -792,7 +833,7 @@ type Health struct { // what SHOULD it have been when we take all we know about the network into consideration. // // used for testing only -func (k *Kademlia) Healthy(pp *PeerPot) *Health { +func (k *Kademlia) GetHealthInfo(pp *PeerPot) *Health { k.lock.RLock() defer k.lock.RUnlock() if len(pp.NNSet) < k.NeighbourhoodSize { @@ -801,7 +842,10 @@ func (k *Kademlia) Healthy(pp *PeerPot) *Health { gotnn, countgotnn, culpritsgotnn := k.connectedNeighbours(pp.NNSet) knownn, countknownn, culpritsknownn := k.knowNeighbours(pp.NNSet) depth := depthForPot(k.conns, k.NeighbourhoodSize, k.base) - saturated := k.saturation() < depth + + // check saturation + saturated := k.isSaturated(pp.PeersPerBin, depth) + log.Trace(fmt.Sprintf("%08x: healthy: knowNNs: %v, gotNNs: %v, saturated: %v\n", k.base, knownn, gotnn, saturated)) return &Health{ KnowNN: knownn, @@ -814,3 +858,13 @@ func (k *Kademlia) Healthy(pp *PeerPot) *Health { Hive: k.string(), } } + +// Healthy return the strict interpretation of `Healthy` given a `Health` struct +// definition of strict health: all conditions must be true: +// - we at least know one peer +// - we know all neighbors +// - we are connected to all known neighbors +// - it is saturated +func (h *Health) Healthy() bool { + return h.KnowNN && h.ConnectNN && h.CountKnowNN > 0 && h.Saturated +} diff --git a/swarm/network/kademlia_test.go b/swarm/network/kademlia_test.go index 8a724756b646..b4663eee5ef8 100644 --- a/swarm/network/kademlia_test.go +++ b/swarm/network/kademlia_test.go @@ -168,6 +168,46 @@ func TestNeighbourhoodDepth(t *testing.T) { testNum++ } +// TestHighMinBinSize tests that the saturation function also works +// if MinBinSize is > 2, the connection count is < k.MinBinSize +// and there are more peers available than connected +func TestHighMinBinSize(t *testing.T) { + // a function to test for different MinBinSize values + testKad := func(minBinSize int) { + // create a test kademlia + tk := newTestKademlia(t, "11111111") + // set its MinBinSize to desired value + tk.KadParams.MinBinSize = minBinSize + + // add a couple of peers (so we have NN and depth) + tk.On("00000000") // bin 0 + tk.On("11100000") // bin 3 + tk.On("11110000") // bin 4 + + first := "10000000" // add a first peer at bin 1 + tk.Register(first) // register it + // we now have one registered peer at bin 1; + // iterate and connect one peer at each iteration; + // should be unhealthy until at minBinSize - 1 + // we connect the unconnected but registered peer + for i := 1; i < minBinSize; i++ { + peer := fmt.Sprintf("1000%b", 8|i) + tk.On(peer) + if i == minBinSize-1 { + tk.On(first) + tk.checkHealth(true) + return + } + tk.checkHealth(false) + } + } + // test MinBinSizes of 3 to 5 + testMinBinSizes := []int{3, 4, 5} + for _, k := range testMinBinSizes { + testKad(k) + } +} + // TestHealthStrict tests the simplest definition of health // Which means whether we are connected to all neighbors we know of func TestHealthStrict(t *testing.T) { @@ -176,60 +216,116 @@ func TestHealthStrict(t *testing.T) { // no peers // unhealthy (and lonely) tk := newTestKademlia(t, "11111111") - tk.checkHealth(false, false) + tk.checkHealth(false) // know one peer but not connected // unhealthy tk.Register("11100000") - tk.checkHealth(false, false) + tk.checkHealth(false) // know one peer and connected - // healthy + // unhealthy: not saturated tk.On("11100000") - tk.checkHealth(true, false) + tk.checkHealth(true) // know two peers, only one connected // unhealthy tk.Register("11111100") - tk.checkHealth(false, false) + tk.checkHealth(false) // know two peers and connected to both // healthy tk.On("11111100") - tk.checkHealth(true, false) + tk.checkHealth(true) // know three peers, connected to the two deepest // healthy tk.Register("00000000") - tk.checkHealth(true, false) + tk.checkHealth(false) // know three peers, connected to all three // healthy tk.On("00000000") - tk.checkHealth(true, false) + tk.checkHealth(true) // add fourth peer deeper than current depth // unhealthy tk.Register("11110000") - tk.checkHealth(false, false) + tk.checkHealth(false) // connected to three deepest peers // healthy tk.On("11110000") - tk.checkHealth(true, false) + tk.checkHealth(true) // add additional peer in same bin as deepest peer // unhealthy tk.Register("11111101") - tk.checkHealth(false, false) + tk.checkHealth(false) // four deepest of five peers connected // healthy tk.On("11111101") - tk.checkHealth(true, false) + tk.checkHealth(true) + + // add additional peer in bin 0 + // unhealthy: unsaturated bin 0, 2 known but 1 connected + tk.Register("00000001") + tk.checkHealth(false) + + // Connect second in bin 0 + // healthy + tk.On("00000001") + tk.checkHealth(true) + + // add peer in bin 1 + // unhealthy, as it is known but not connected + tk.Register("10000000") + tk.checkHealth(false) + + // connect peer in bin 1 + // depth change, is now 1 + // healthy, 1 peer in bin 1 known and connected + tk.On("10000000") + tk.checkHealth(true) + + // add second peer in bin 1 + // unhealthy, as it is known but not connected + tk.Register("10000001") + tk.checkHealth(false) + + // connect second peer in bin 1 + // healthy, + tk.On("10000001") + tk.checkHealth(true) + + // connect third peer in bin 1 + // healthy, + tk.On("10000011") + tk.checkHealth(true) + + // add peer in bin 2 + // unhealthy, no depth change + tk.Register("11000000") + tk.checkHealth(false) + + // connect peer in bin 2 + // depth change - as we already have peers in bin 3 and 4, + // we have contiguous bins, no bin < po 5 is empty -> depth 5 + // healthy, every bin < depth has the max available peers, + // even if they are < MinBinSize + tk.On("11000000") + tk.checkHealth(true) + + // add peer in bin 2 + // unhealthy, peer bin is below depth 5 but + // has more available peers (2) than connected ones (1) + // --> unsaturated + tk.Register("11000011") + tk.checkHealth(false) } -func (tk *testKademlia) checkHealth(expectHealthy bool, expectSaturation bool) { +func (tk *testKademlia) checkHealth(expectHealthy bool) { tk.t.Helper() kid := common.Bytes2Hex(tk.BaseAddr()) addrs := [][]byte{tk.BaseAddr()} @@ -239,13 +335,13 @@ func (tk *testKademlia) checkHealth(expectHealthy bool, expectSaturation bool) { }) pp := NewPeerPotMap(tk.NeighbourhoodSize, addrs) - healthParams := tk.Healthy(pp[kid]) + healthParams := tk.GetHealthInfo(pp[kid]) // definition of health, all conditions but be true: // - we at least know one peer // - we know all neighbors // - we are connected to all known neighbors - health := healthParams.KnowNN && healthParams.ConnectNN && healthParams.CountKnowNN > 0 + health := healthParams.Healthy() if expectHealthy != health { tk.t.Fatalf("expected kademlia health %v, is %v\n%v", expectHealthy, health, tk.String()) } diff --git a/swarm/network/simulation/kademlia.go b/swarm/network/simulation/kademlia.go index 6d8d0e0a2cd5..c58d402b0942 100644 --- a/swarm/network/simulation/kademlia.go +++ b/swarm/network/simulation/kademlia.go @@ -64,7 +64,7 @@ func (s *Simulation) WaitTillHealthy(ctx context.Context) (ill map[enode.ID]*net addr := common.Bytes2Hex(k.BaseAddr()) pp := ppmap[addr] //call Healthy RPC - h := k.Healthy(pp) + h := k.GetHealthInfo(pp) //print info log.Debug(k.String()) log.Debug("kademlia", "connectNN", h.ConnectNN, "knowNN", h.KnowNN) diff --git a/swarm/network/simulations/discovery/discovery_test.go b/swarm/network/simulations/discovery/discovery_test.go index 86aa6f6ddd54..5227de3bb584 100644 --- a/swarm/network/simulations/discovery/discovery_test.go +++ b/swarm/network/simulations/discovery/discovery_test.go @@ -267,7 +267,7 @@ func discoverySimulation(nodes, conns int, adapter adapters.NodeAdapter) (*simul } healthy := &network.Health{} - if err := client.Call(&healthy, "hive_healthy", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil { + if err := client.Call(&healthy, "hive_getHealthInfo", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil { return false, fmt.Errorf("error getting node health: %s", err) } log.Debug(fmt.Sprintf("node %4s healthy: connected nearest neighbours: %v, know nearest neighbours: %v,\n\n%v", id, healthy.ConnectNN, healthy.KnowNN, healthy.Hive)) @@ -352,7 +352,7 @@ func discoveryPersistenceSimulation(nodes, conns int, adapter adapters.NodeAdapt healthy := &network.Health{} addr := id.String() ppmap := network.NewPeerPotMap(network.NewKadParams().NeighbourhoodSize, addrs) - if err := client.Call(&healthy, "hive_healthy", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil { + if err := client.Call(&healthy, "hive_getHealthInfo", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil { return fmt.Errorf("error getting node health: %s", err) } @@ -422,7 +422,7 @@ func discoveryPersistenceSimulation(nodes, conns int, adapter adapters.NodeAdapt healthy := &network.Health{} ppmap := network.NewPeerPotMap(network.NewKadParams().NeighbourhoodSize, addrs) - if err := client.Call(&healthy, "hive_healthy", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil { + if err := client.Call(&healthy, "hive_getHealthInfo", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil { return false, fmt.Errorf("error getting node health: %s", err) } log.Info(fmt.Sprintf("node %4s healthy: got nearest neighbours: %v, know nearest neighbours: %v", id, healthy.ConnectNN, healthy.KnowNN))