From a84906bed084f8c28d70f64d155edab2e27ec9cc Mon Sep 17 00:00:00 2001 From: Giulio Frasca Date: Tue, 18 Apr 2023 02:21:06 -0400 Subject: [PATCH] Add CR Ready Status Metrics --- README.md | 12 +++++ controllers/dspipeline_controller.go | 39 +++++++++++++++ controllers/metrics.go | 74 ++++++++++++++++++++++++++++ go.mod | 2 +- main.go | 2 + 5 files changed, 128 insertions(+), 1 deletion(-) create mode 100644 controllers/metrics.go diff --git a/README.md b/README.md index 745c3c27e9..7179dc68ab 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,7 @@ Data Science Pipeline stacks onto individual OCP namespaces. 1. [Cleanup ODH Installation](#cleanup-odh-installation) 2. [Cleanup Standalone Installation](#cleanup-standalone-installation) 5. [Run tests](#run-tests) +6. [Metrics](#metrics) # Quickstart @@ -359,6 +360,17 @@ You can find a more permanent location to install `setup-envtest` into on your l `KUBEBUILDER_ASSETS` into your `.bashrc` or equivalent. By doing this you can always run `pre-commit run --all-files` without having to repeat these steps. +# Metrics + +The Data Science Pipelines Operator exposes standard operator-sdk metrics for controller monitoring purposes. +In addition to these metrics, DSPO also exposes several custom metrics for monitoring the status of the DataSciencePipelinesApplications that it owns. + +They are as follows: +- `data_science_pipelines_application_apiserver_ready` - Gauge that indicates if the DSPA's APIServer is in a Ready state (1 => Ready, 0 => Not Ready) +- `data_science_pipelines_application_persistenceagent_ready` - Gauge that indicates if the DSPA's PersistenceAgent is in a Ready state (1 => Ready, 0 => Not Ready) +- `data_science_pipelines_application_scheduledworkflow_ready` - Gauge that indicates if the DSPA's ScheduledWorkflow manager is in a Ready state (1 => Ready, 0 => Not Ready) +- `data_science_pipelines_application_ready` - Gauge that indicates if the DSPA is in a fully Ready state (1 => Ready, 0 => Not Ready) + [cluster admin]: https://docs.openshift.com/container-platform/4.12/authentication/using-rbac.html#creating-cluster-admin_using-rbac [oc client]: https://mirror.openshift.com/pub/openshift-v4/x86_64/clients/ocp/latest/openshift-client-linux.tar.gz [OCP Pipelines Operator]: https://docs.openshift.com/container-platform/4.12/cicd/pipelines/installing-pipelines.html#op-installing-pipelines-operator-in-web-console_installing-pipelines diff --git a/controllers/dspipeline_controller.go b/controllers/dspipeline_controller.go index 38cc9fd4f6..31d7028571 100644 --- a/controllers/dspipeline_controller.go +++ b/controllers/dspipeline_controller.go @@ -308,9 +308,48 @@ func (r *DSPAReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl. return ctrl.Result{}, err } + r.PublishMetrics(dspa, apiServerReady, persistenceAgentReady, scheduledWorkflowReady, crReady) + return ctrl.Result{}, nil } +func (r *DSPAReconciler) PublishMetrics(dspa *dspav1alpha1.DataSciencePipelinesApplication, + apiServerReady, persistenceAgentReady, scheduledWorkflowReady, + crReady metav1.Condition) { + r.Log.Info("Publishing Ready Metrics") + if apiServerReady.Status == metav1.ConditionTrue { + r.Log.Info("APIServer Ready") + APIServerReadyMetric.WithLabelValues(dspa.Name, dspa.Namespace).Set(1) + } else { + r.Log.Info("APIServer Not Ready") + APIServerReadyMetric.WithLabelValues(dspa.Name, dspa.Namespace).Set(0) + } + + if persistenceAgentReady.Status == metav1.ConditionTrue { + r.Log.Info("PersistanceAgent Ready") + PersistenceAgentReadyMetric.WithLabelValues(dspa.Name, dspa.Namespace).Set(1) + } else { + r.Log.Info("PersistanceAgent Not Ready") + PersistenceAgentReadyMetric.WithLabelValues(dspa.Name, dspa.Namespace).Set(0) + } + + if scheduledWorkflowReady.Status == metav1.ConditionTrue { + r.Log.Info("ScheduledWorkflow Ready") + ScheduledWorkflowReadyMetric.WithLabelValues(dspa.Name, dspa.Namespace).Set(1) + } else { + r.Log.Info("ScheduledWorkflow Not Ready") + ScheduledWorkflowReadyMetric.WithLabelValues(dspa.Name, dspa.Namespace).Set(0) + } + + if crReady.Status == metav1.ConditionTrue { + r.Log.Info("CR Fully Ready") + CrReadyMetric.WithLabelValues(dspa.Name, dspa.Namespace).Set(1) + } else { + r.Log.Info("CR Not Ready") + CrReadyMetric.WithLabelValues(dspa.Name, dspa.Namespace).Set(0) + } +} + // SetupWithManager sets up the controller with the Manager. func (r *DSPAReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). diff --git a/controllers/metrics.go b/controllers/metrics.go new file mode 100644 index 0000000000..0b8c319fd9 --- /dev/null +++ b/controllers/metrics.go @@ -0,0 +1,74 @@ +/* +Copyright 2023. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controllers + +import ( + "github.com/prometheus/client_golang/prometheus" + "sigs.k8s.io/controller-runtime/pkg/metrics" +) + +// Prometheus metrics gauges +var ( + APIServerReadyMetric = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "data_science_pipelines_application_apiserver_ready", + Help: "Data Science Pipelines Application - APIServer Ready Status", + }, + []string{ + "dspa_name", + "namespace", + }, + ) + PersistenceAgentReadyMetric = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "data_science_pipelines_application_persistenceagent_ready", + Help: "Data Science Pipelines Application - PersistenceAgent Ready Status", + }, + []string{ + "dspa_name", + "namespace", + }, + ) + ScheduledWorkflowReadyMetric = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "data_science_pipelines_application_scheduledworkflow_ready", + Help: "Data Science Pipelines Application - ScheduledWorkflow Ready Status", + }, + []string{ + "dspa_name", + "namespace", + }, + ) + CrReadyMetric = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "data_science_pipelines_application_ready", + Help: "Data Science Pipelines Application - CustomResource Ready Status", + }, + []string{ + "dspa_name", + "namespace", + }, + ) +) + +// InitMetrics initialize prometheus metrics +func InitMetrics() { + metrics.Registry.MustRegister(APIServerReadyMetric, + PersistenceAgentReadyMetric, + ScheduledWorkflowReadyMetric, + CrReadyMetric) +} diff --git a/go.mod b/go.mod index 2717dc2a4b..99550c349c 100644 --- a/go.mod +++ b/go.mod @@ -11,6 +11,7 @@ require ( github.com/onsi/ginkgo/v2 v2.8.4 github.com/onsi/gomega v1.27.1 github.com/openshift/api v3.9.0+incompatible + github.com/prometheus/client_golang v1.12.2 github.com/spf13/viper v1.4.0 go.uber.org/zap v1.21.0 k8s.io/api v0.25.0 @@ -63,7 +64,6 @@ require ( github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/pelletier/go-toml v1.2.0 // indirect github.com/pkg/errors v0.9.1 // indirect - github.com/prometheus/client_golang v1.12.2 // indirect github.com/prometheus/client_model v0.2.0 // indirect github.com/prometheus/common v0.32.1 // indirect github.com/prometheus/procfs v0.7.3 // indirect diff --git a/main.go b/main.go index 44c8d83458..36c72c6940 100644 --- a/main.go +++ b/main.go @@ -58,6 +58,8 @@ func init() { utilruntime.Must(dspav1alpha1.AddToScheme(scheme)) //+kubebuilder:scaffold:scheme + + controllers.InitMetrics() } func initConfig(configPath string) error {