From 01707429e75ad5588b874cf33cb888f0476a6659 Mon Sep 17 00:00:00 2001 From: Alvaro Aleman Date: Fri, 29 Nov 2024 21:38:41 -0500 Subject: [PATCH 1/3] warning: Use leader elector with client timeout This change makes the leader elector use a client that internally has a smaller timeout than the renew deadline, which avoids a situation where a single request timing out makes us lose the leader lease. --- pkg/leaderelection/leader_election.go | 24 +++++++++--------------- pkg/manager/manager.go | 1 + pkg/manager/manager_test.go | 22 ++++++++++++++++++++++ 3 files changed, 32 insertions(+), 15 deletions(-) diff --git a/pkg/leaderelection/leader_election.go b/pkg/leaderelection/leader_election.go index ee4fcf4cbe..5a41f5c747 100644 --- a/pkg/leaderelection/leader_election.go +++ b/pkg/leaderelection/leader_election.go @@ -20,10 +20,9 @@ import ( "errors" "fmt" "os" + "time" "k8s.io/apimachinery/pkg/util/uuid" - coordinationv1client "k8s.io/client-go/kubernetes/typed/coordination/v1" - corev1client "k8s.io/client-go/kubernetes/typed/core/v1" "k8s.io/client-go/rest" "k8s.io/client-go/tools/leaderelection/resourcelock" @@ -49,6 +48,9 @@ type Options struct { // LeaderElectionID determines the name of the resource that leader election // will use for holding the leader lock. LeaderElectionID string + + // RewnewDeadline is the renew deadline for this leader election client + RewnewDeadline time.Duration } // NewResourceLock creates a new resource lock for use in a leader election loop. @@ -88,25 +90,17 @@ func NewResourceLock(config *rest.Config, recorderProvider recorder.Provider, op // Construct clients for leader election rest.AddUserAgent(config, "leader-election") - corev1Client, err := corev1client.NewForConfig(config) - if err != nil { - return nil, err - } - - coordinationClient, err := coordinationv1client.NewForConfig(config) - if err != nil { - return nil, err - } - return resourcelock.New(options.LeaderElectionResourceLock, + return resourcelock.NewFromKubeconfig(options.LeaderElectionResourceLock, options.LeaderElectionNamespace, options.LeaderElectionID, - corev1Client, - coordinationClient, resourcelock.ResourceLockConfig{ Identity: id, EventRecorder: recorderProvider.GetEventRecorderFor(id), - }) + }, + config, + options.RewnewDeadline, + ) } func getInClusterNamespace() (string, error) { diff --git a/pkg/manager/manager.go b/pkg/manager/manager.go index 3166f4818f..8e5d3bc8a7 100644 --- a/pkg/manager/manager.go +++ b/pkg/manager/manager.go @@ -389,6 +389,7 @@ func New(config *rest.Config, options Options) (Manager, error) { LeaderElectionResourceLock: options.LeaderElectionResourceLock, LeaderElectionID: options.LeaderElectionID, LeaderElectionNamespace: options.LeaderElectionNamespace, + RewnewDeadline: *options.RenewDeadline, }) if err != nil { return nil, err diff --git a/pkg/manager/manager_test.go b/pkg/manager/manager_test.go index c42d2f2ae7..792bc4f967 100644 --- a/pkg/manager/manager_test.go +++ b/pkg/manager/manager_test.go @@ -317,6 +317,28 @@ var _ = Describe("manger.Manager", func() { <-m2done }) + It("should default RenewDeadline for leader election config", func() { + var rl resourcelock.Interface + m1, err := New(cfg, Options{ + LeaderElection: true, + LeaderElectionNamespace: "default", + LeaderElectionID: "test-leader-election-id", + newResourceLock: func(config *rest.Config, recorderProvider recorder.Provider, options leaderelection.Options) (resourcelock.Interface, error) { + if options.RewnewDeadline != 10*time.Second { + return nil, fmt.Errorf("expected RenewDeadline to be 10s, got %v", options.RewnewDeadline) + } + var err error + rl, err = leaderelection.NewResourceLock(config, recorderProvider, options) + return rl, err + }, + HealthProbeBindAddress: "0", + Metrics: metricsserver.Options{BindAddress: "0"}, + PprofBindAddress: "0", + }) + Expect(err).ToNot(HaveOccurred()) + Expect(m1).ToNot(BeNil()) + }) + It("should default ID to controller-runtime if ID is not set", func() { var rl resourcelock.Interface m1, err := New(cfg, Options{ From 4bc3811a419e7564daea4baaabc1bae9d4c011df Mon Sep 17 00:00:00 2001 From: Vince Prignano Date: Sat, 30 Nov 2024 07:09:43 -0800 Subject: [PATCH 2/3] :bug: Fix RenewDeadline typo in leader election Signed-off-by: Vince Prignano --- pkg/leaderelection/leader_election.go | 6 +++--- pkg/manager/manager.go | 2 +- pkg/manager/manager_test.go | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pkg/leaderelection/leader_election.go b/pkg/leaderelection/leader_election.go index 5a41f5c747..69daf94e48 100644 --- a/pkg/leaderelection/leader_election.go +++ b/pkg/leaderelection/leader_election.go @@ -49,8 +49,8 @@ type Options struct { // will use for holding the leader lock. LeaderElectionID string - // RewnewDeadline is the renew deadline for this leader election client - RewnewDeadline time.Duration + // RenewDeadline is the renew deadline for this leader election client + RenewDeadline time.Duration } // NewResourceLock creates a new resource lock for use in a leader election loop. @@ -99,7 +99,7 @@ func NewResourceLock(config *rest.Config, recorderProvider recorder.Provider, op EventRecorder: recorderProvider.GetEventRecorderFor(id), }, config, - options.RewnewDeadline, + options.RenewDeadline, ) } diff --git a/pkg/manager/manager.go b/pkg/manager/manager.go index 8e5d3bc8a7..92906fe6ca 100644 --- a/pkg/manager/manager.go +++ b/pkg/manager/manager.go @@ -389,7 +389,7 @@ func New(config *rest.Config, options Options) (Manager, error) { LeaderElectionResourceLock: options.LeaderElectionResourceLock, LeaderElectionID: options.LeaderElectionID, LeaderElectionNamespace: options.LeaderElectionNamespace, - RewnewDeadline: *options.RenewDeadline, + RenewDeadline: *options.RenewDeadline, }) if err != nil { return nil, err diff --git a/pkg/manager/manager_test.go b/pkg/manager/manager_test.go index 792bc4f967..6e5353e345 100644 --- a/pkg/manager/manager_test.go +++ b/pkg/manager/manager_test.go @@ -324,8 +324,8 @@ var _ = Describe("manger.Manager", func() { LeaderElectionNamespace: "default", LeaderElectionID: "test-leader-election-id", newResourceLock: func(config *rest.Config, recorderProvider recorder.Provider, options leaderelection.Options) (resourcelock.Interface, error) { - if options.RewnewDeadline != 10*time.Second { - return nil, fmt.Errorf("expected RenewDeadline to be 10s, got %v", options.RewnewDeadline) + if options.RenewDeadline != 10*time.Second { + return nil, fmt.Errorf("expected RenewDeadline to be 10s, got %v", options.RenewDeadline) } var err error rl, err = leaderelection.NewResourceLock(config, recorderProvider, options) From 2a0ce596d97af3673d20ff8930a60dba89a20ca0 Mon Sep 17 00:00:00 2001 From: Alvaro Aleman Date: Sun, 1 Dec 2024 11:21:14 -0500 Subject: [PATCH 3/3] :seedling: Make using leader elector with client timeout non-breaking This change is a follow-up to the one that introduces the usage of the leader-elector with client timeout. That change was breaking because it introduces a new option and always assumed it was set. This change makes us only use that option if its actually set. --- pkg/leaderelection/leader_election.go | 35 ++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/pkg/leaderelection/leader_election.go b/pkg/leaderelection/leader_election.go index 69daf94e48..5cc253917a 100644 --- a/pkg/leaderelection/leader_election.go +++ b/pkg/leaderelection/leader_election.go @@ -23,6 +23,8 @@ import ( "time" "k8s.io/apimachinery/pkg/util/uuid" + coordinationv1client "k8s.io/client-go/kubernetes/typed/coordination/v1" + corev1client "k8s.io/client-go/kubernetes/typed/core/v1" "k8s.io/client-go/rest" "k8s.io/client-go/tools/leaderelection/resourcelock" @@ -49,7 +51,10 @@ type Options struct { // will use for holding the leader lock. LeaderElectionID string - // RenewDeadline is the renew deadline for this leader election client + // RenewDeadline is the renew deadline for this leader election client. + // Must be set to ensure the resource lock has an appropriate client timeout. + // Without that, a single slow response from the API server can result + // in losing leadership. RenewDeadline time.Duration } @@ -91,15 +96,37 @@ func NewResourceLock(config *rest.Config, recorderProvider recorder.Provider, op // Construct clients for leader election rest.AddUserAgent(config, "leader-election") - return resourcelock.NewFromKubeconfig(options.LeaderElectionResourceLock, + if options.RenewDeadline != 0 { + return resourcelock.NewFromKubeconfig(options.LeaderElectionResourceLock, + options.LeaderElectionNamespace, + options.LeaderElectionID, + resourcelock.ResourceLockConfig{ + Identity: id, + EventRecorder: recorderProvider.GetEventRecorderFor(id), + }, + config, + options.RenewDeadline, + ) + } + + corev1Client, err := corev1client.NewForConfig(config) + if err != nil { + return nil, err + } + + coordinationClient, err := coordinationv1client.NewForConfig(config) + if err != nil { + return nil, err + } + return resourcelock.New(options.LeaderElectionResourceLock, options.LeaderElectionNamespace, options.LeaderElectionID, + corev1Client, + coordinationClient, resourcelock.ResourceLockConfig{ Identity: id, EventRecorder: recorderProvider.GetEventRecorderFor(id), }, - config, - options.RenewDeadline, ) }