Skip to content

Commit

Permalink
UPSTREAM: <carry>: skip posting failures to aggregated APIs to avoid …
Browse files Browse the repository at this point in the history
…getting false positives until the server becomes ready

the availability checks depend on fully initialized SDN
OpenShift carries a few reachability checks that affect /readyz protocol
we skip posting failures to avoid getting false positives until the server becomes ready

UPSTREAM: <carry>: skip posting failures to aggregated APIs to avoid getting false positives until the server becomes ready

marks availability of the server before checking the aggregate APIs
as it can change as we are running the checks.
in that case, skip posting failures to avoid false positives.

note on the next rebase please squash with the previous commit

UPSTREAM: <carry>: expose HasBeenReady lifecycle signal

OpenShift-Rebase-Source: 8558e88
  • Loading branch information
p0lyn0mial authored and bertinatto committed Dec 11, 2024
1 parent a1beeca commit 55d5ef8
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 1 deletion.
5 changes: 5 additions & 0 deletions staging/src/k8s.io/apiserver/pkg/server/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -691,6 +691,11 @@ func (c *Config) ShutdownInitiatedNotify() <-chan struct{} {
return c.lifecycleSignals.ShutdownInitiated.Signaled()
}

// HasBeenReadySignal exposes a server's lifecycle signal which is signaled when the readyz endpoint succeeds for the first time.
func (c *Config) HasBeenReadySignal() <-chan struct{} {
return c.lifecycleSignals.HasBeenReady.Signaled()
}

// Complete fills in any fields not set that are required to have valid data and can be derived
// from other fields. If you're going to `ApplyOptions`, do that first. It's mutating the receiver.
func (c *Config) Complete(informers informers.SharedInformerFactory) CompletedConfig {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,7 @@ func (c completedConfig) NewWithDelegate(delegationTarget genericapiserver.Deleg
(func() ([]byte, []byte))(s.proxyCurrentCertKeyContent),
s.serviceResolver,
metrics,
c.GenericConfig.HasBeenReadySignal(),
)
if err != nil {
return nil, err
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ type AvailableConditionController struct {

// metrics registered into legacy registry
metrics *availabilitymetrics.Metrics

// hasBeenReady is signaled when the readyz endpoint succeeds for the first time.
hasBeenReady <-chan struct{}
}

// New returns a new remote APIService AvailableConditionController.
Expand All @@ -98,6 +101,7 @@ func New(
proxyCurrentCertKeyContent certKeyFunc,
serviceResolver ServiceResolver,
metrics *availabilitymetrics.Metrics,
hasBeenReady <-chan struct{},
) (*AvailableConditionController, error) {
c := &AvailableConditionController{
apiServiceClient: apiServiceClient,
Expand All @@ -115,6 +119,7 @@ func New(
proxyTransportDial: proxyTransportDial,
proxyCurrentCertKeyContent: proxyCurrentCertKeyContent,
metrics: metrics,
hasBeenReady: hasBeenReady,
}

// resync on this one because it is low cardinality and rechecking the actual discovery
Expand Down Expand Up @@ -164,6 +169,18 @@ func (c *AvailableConditionController) sync(key string) error {
return nil
}

// the availability checks depend on fully initialized SDN
// OpenShift carries a few reachability checks that affect /readyz protocol
// record availability of the server so that we can
// skip posting failures to avoid getting false positives until the server becomes ready
hasBeenReady := false
select {
case <-c.hasBeenReady:
hasBeenReady = true
default:
// continue, we will skip posting only potential failures
}

apiService := originalAPIService.DeepCopy()

// if a particular transport was specified, use that otherwise build one
Expand Down Expand Up @@ -347,6 +364,11 @@ func (c *AvailableConditionController) sync(key string) error {
}

if lastError != nil {
if !hasBeenReady {
// returning an error will requeue the item in an exponential fashion
return fmt.Errorf("the server hasn't been ready yet, skipping updating availability of the aggreaged API until the server becomes ready to avoid false positives, lastError = %v", lastError)
}

availableCondition.Status = apiregistrationv1.ConditionFalse
availableCondition.Reason = "FailedDiscoveryCheck"
availableCondition.Message = lastError.Error()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,9 @@ func setupAPIServices(t T, apiServices []runtime.Object) (*AvailableConditionCon
}
}

alwaysReadyChan := make(chan struct{})
close(alwaysReadyChan)

c := AvailableConditionController{
apiServiceClient: fakeClient.ApiregistrationV1(),
apiServiceLister: listers.NewAPIServiceLister(apiServiceIndexer),
Expand All @@ -141,7 +144,8 @@ func setupAPIServices(t T, apiServices []runtime.Object) (*AvailableConditionCon
workqueue.NewTypedItemExponentialFailureRateLimiter[string](5*time.Millisecond, 30*time.Second),
workqueue.TypedRateLimitingQueueConfig[string]{Name: "AvailableConditionController"},
),
metrics: availabilitymetrics.New(),
metrics: availabilitymetrics.New(),
hasBeenReady: alwaysReadyChan,
}
for _, svc := range apiServices {
c.addAPIService(svc)
Expand Down Expand Up @@ -401,6 +405,8 @@ func TestSync(t *testing.T) {
w.WriteHeader(tc.backendStatus)
}))
defer testServer.Close()
alwaysReadyChan := make(chan struct{})
close(alwaysReadyChan)

c := AvailableConditionController{
apiServiceClient: fakeClient.ApiregistrationV1(),
Expand All @@ -410,6 +416,7 @@ func TestSync(t *testing.T) {
serviceResolver: &fakeServiceResolver{url: testServer.URL},
proxyCurrentCertKeyContent: func() ([]byte, []byte) { return emptyCert(), emptyCert() },
metrics: availabilitymetrics.New(),
hasBeenReady: alwaysReadyChan,
}
err := c.sync(tc.apiServiceName)
if tc.expectedSyncError != "" {
Expand Down

0 comments on commit 55d5ef8

Please sign in to comment.