From 4df0c482356f338c0d2bb7e806313f869a853fcf Mon Sep 17 00:00:00 2001 From: pmahindrakar-oss Date: Fri, 10 Dec 2021 22:29:44 +0530 Subject: [PATCH] Removed retries during precheck and falling back on k8s retries (#306) * Removed retries during precheck and falling back on k8s retries Signed-off-by: Prafulla Mahindrakar * Linter fixes Signed-off-by: Prafulla Mahindrakar --- .../cmd/scheduler/entrypoints/precheck.go | 45 ++++++------------- .../runtime/application_config_provider.go | 6 --- .../interfaces/application_configuration.go | 7 --- 3 files changed, 13 insertions(+), 45 deletions(-) diff --git a/flyteadmin/cmd/scheduler/entrypoints/precheck.go b/flyteadmin/cmd/scheduler/entrypoints/precheck.go index 9593b93c91..ee25bc48c3 100644 --- a/flyteadmin/cmd/scheduler/entrypoints/precheck.go +++ b/flyteadmin/cmd/scheduler/entrypoints/precheck.go @@ -4,18 +4,16 @@ import ( "context" "fmt" - "github.com/flyteorg/flyteadmin/pkg/runtime" "github.com/flyteorg/flyteidl/clients/go/admin" "github.com/flyteorg/flytestdlib/logger" "github.com/spf13/cobra" "google.golang.org/grpc/health/grpc_health_v1" - "k8s.io/client-go/util/retry" ) const ( healthCheckSuccess = "Health check passed, Flyteadmin is up and running" - healthCheckError = "Health check failed with status %v" + healthCheckError = "health check failed with status %v" ) var preCheckRunCmd = &cobra.Command{ @@ -24,40 +22,23 @@ var preCheckRunCmd = &cobra.Command{ RunE: func(cmd *cobra.Command, args []string) error { ctx := context.Background() - appConfig := runtime.NewApplicationConfigurationProvider() - opts := appConfig.GetSchedulerConfig().GetPrecheckBackoff() - - err := retry.OnError(opts, - func(err error) bool { - logger.Errorf(ctx, "Attempt failed due to %v", err) - return err != nil - }, - func() error { - clientSet, err := admin.ClientSetBuilder().WithConfig(admin.GetConfig(ctx)).Build(ctx) - - if err != nil { - logger.Errorf(ctx, "Flyte native scheduler precheck failed due to %v\n", err) - return err - } - - healthCheckResponse, err := clientSet.HealthServiceClient().Check(ctx, - &grpc_health_v1.HealthCheckRequest{Service: "flyteadmin"}) - if err != nil { - return err - } - if healthCheckResponse.GetStatus() != grpc_health_v1.HealthCheckResponse_SERVING { - logger.Errorf(ctx, healthCheckError, healthCheckResponse.GetStatus()) - return fmt.Errorf(healthCheckError, healthCheckResponse.GetStatus()) - } - logger.Infof(ctx, "Health check response is %v", healthCheckResponse) - return nil - }, - ) + clientSet, err := admin.ClientSetBuilder().WithConfig(admin.GetConfig(ctx)).Build(ctx) if err != nil { + logger.Errorf(ctx, "Flyte native scheduler precheck failed due to %v\n", err) return err } + healthCheckResponse, err := clientSet.HealthServiceClient().Check(ctx, + &grpc_health_v1.HealthCheckRequest{Service: "flyteadmin"}) + if err != nil { + return err + } + if healthCheckResponse.GetStatus() != grpc_health_v1.HealthCheckResponse_SERVING { + logger.Errorf(ctx, healthCheckError, healthCheckResponse.GetStatus()) + return fmt.Errorf(healthCheckError, healthCheckResponse.GetStatus()) + } + logger.Infof(ctx, "Health check response is %v", healthCheckResponse) logger.Infof(ctx, healthCheckSuccess) return nil }, diff --git a/flyteadmin/pkg/runtime/application_config_provider.go b/flyteadmin/pkg/runtime/application_config_provider.go index 2c6687314b..3ea762aae9 100644 --- a/flyteadmin/pkg/runtime/application_config_provider.go +++ b/flyteadmin/pkg/runtime/application_config_provider.go @@ -5,14 +5,11 @@ import ( "io/ioutil" "os" "strings" - "time" "github.com/flyteorg/flyteadmin/pkg/common" "github.com/flyteorg/flyteadmin/pkg/runtime/interfaces" "github.com/flyteorg/flytestdlib/config" "github.com/flyteorg/flytestdlib/logger" - - "k8s.io/apimachinery/pkg/util/wait" ) const database = "database" @@ -59,9 +56,6 @@ var schedulerConfig = config.MustRegisterSection(scheduler, &interfaces.Schedule }, }, }, - PrecheckBackoff: wait.Backoff{ - Duration: time.Second, Factor: 2.0, Steps: 30, Jitter: 0.1, - }, }) var remoteDataConfig = config.MustRegisterSection(remoteData, &interfaces.RemoteDataConfig{ Scheme: common.None, diff --git a/flyteadmin/pkg/runtime/interfaces/application_configuration.go b/flyteadmin/pkg/runtime/interfaces/application_configuration.go index d2b4dcc491..76f76642f0 100644 --- a/flyteadmin/pkg/runtime/interfaces/application_configuration.go +++ b/flyteadmin/pkg/runtime/interfaces/application_configuration.go @@ -3,7 +3,6 @@ package interfaces import ( "github.com/flyteorg/flytestdlib/config" "golang.org/x/time/rate" - "k8s.io/apimachinery/pkg/util/wait" ) // This configuration section is used to for initiating the database connection with the store that holds registered @@ -276,8 +275,6 @@ type SchedulerConfig struct { ReconnectAttempts int `json:"reconnectAttempts"` // Specifies the time interval to wait before attempting to reconnect the workflow executor client. ReconnectDelaySeconds int `json:"reconnectDelaySeconds"` - // Specifies the backoff settings when scheduler checks for the flyteadmin health during startup. - PrecheckBackoff wait.Backoff `json:"backoff"` } func (s *SchedulerConfig) GetEventSchedulerConfig() EventSchedulerConfig { @@ -296,10 +293,6 @@ func (s *SchedulerConfig) GetReconnectDelaySeconds() int { return s.ReconnectDelaySeconds } -func (s *SchedulerConfig) GetPrecheckBackoff() wait.Backoff { - return s.PrecheckBackoff -} - // Configuration specific to setting up signed urls. type SignedURL struct { // Whether signed urls should even be returned with GetExecutionData, GetNodeExecutionData and GetTaskExecutionData