Skip to content

Commit

Permalink
Re-work how we wait for API/Ingress to update (#82)
Browse files Browse the repository at this point in the history
* Re-work how we wait for API/Ingress to update

* check for error

* move ResetRoutes out of verifyDomain
  • Loading branch information
loganmc10 authored Jul 17, 2023
1 parent 55338a5 commit e7bc442
Show file tree
Hide file tree
Showing 5 changed files with 104 additions and 62 deletions.
8 changes: 8 additions & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,14 @@ rules:
- get
- list
- watch
- apiGroups:
- config.openshift.io
resources:
- dnses
verbs:
- get
- list
- watch
- apiGroups:
- config.openshift.io
resources:
Expand Down
106 changes: 88 additions & 18 deletions controllers/clusterrelocation_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@ package controllers

import (
"context"
"crypto/tls"
"fmt"
"net"
"time"

rhsysenggithubiov1beta1 "github.com/RHsyseng/cluster-relocation-operator/api/v1beta1"
reconcileACM "github.com/RHsyseng/cluster-relocation-operator/internal/acm"
Expand All @@ -31,6 +33,7 @@ import (
reconcilePullSecret "github.com/RHsyseng/cluster-relocation-operator/internal/pullSecret"
registryCert "github.com/RHsyseng/cluster-relocation-operator/internal/registryCert"
reconcileSSH "github.com/RHsyseng/cluster-relocation-operator/internal/ssh"
"github.com/RHsyseng/cluster-relocation-operator/internal/util"
agentv1 "github.com/stolostron/klusterlet-addon-controller/pkg/apis/agent/v1"
clusterv1 "open-cluster-management.io/api/cluster/v1"
operatorapiv1 "open-cluster-management.io/api/operator/v1"
Expand Down Expand Up @@ -75,6 +78,7 @@ const relocationFinalizer = "relocationfinalizer"
//+kubebuilder:rbac:groups="",resources=secrets,verbs=watch;list
//+kubebuilder:rbac:groups="",resources=configmaps,verbs=watch;list
//+kubebuilder:rbac:groups=config.openshift.io,resources=clusterversions,verbs=get;watch;list
//+kubebuilder:rbac:groups=config.openshift.io,resources=dnses,verbs=get;watch;list
//+kubebuilder:rbac:groups=config.openshift.io,resources=imagedigestmirrorsets,verbs=watch;list
//+kubebuilder:rbac:groups=operators.coreos.com,resources=catalogsources,verbs=watch;list
//+kubebuilder:rbac:groups=machineconfiguration.openshift.io,resources=machineconfigs,verbs=watch;list
Expand Down Expand Up @@ -181,24 +185,6 @@ func (r *ClusterRelocationReconciler) Reconcile(ctx context.Context, req ctrl.Re
return ctrl.Result{}, err
}

// Applies a new certificate and domain alias to the API server
if err := reconcileAPI.Reconcile(ctx, r.Client, r.Scheme, relocation, logger); err != nil {
r.setFailedStatus(relocation, rhsysenggithubiov1beta1.APIReconciliationFailedReason, err.Error())
return ctrl.Result{}, err
}

// Applies a new certificate and domain alias to the Apps ingressesed
if err := reconcileIngress.Reconcile(ctx, r.Client, r.Scheme, relocation, logger); err != nil {
r.setFailedStatus(relocation, rhsysenggithubiov1beta1.IngressReconciliationFailedReason, err.Error())
return ctrl.Result{}, err
}

// Apply a new cluster-wide pull secret
if err := reconcilePullSecret.Reconcile(ctx, r.Client, r.Scheme, relocation, logger); err != nil {
r.setFailedStatus(relocation, rhsysenggithubiov1beta1.PullSecretReconciliationFailedReason, err.Error())
return ctrl.Result{}, err
}

// Applies a SSH key for the 'core' user
if err := reconcileSSH.Reconcile(ctx, r.Client, r.Scheme, relocation, logger); err != nil {
r.setFailedStatus(relocation, rhsysenggithubiov1beta1.SSHReconciliationFailedReason, err.Error())
Expand All @@ -217,12 +203,40 @@ func (r *ClusterRelocationReconciler) Reconcile(ctx context.Context, req ctrl.Re
return ctrl.Result{}, err
}

// Apply a new cluster-wide pull secret
if err := reconcilePullSecret.Reconcile(ctx, r.Client, r.Scheme, relocation, logger); err != nil {
r.setFailedStatus(relocation, rhsysenggithubiov1beta1.PullSecretReconciliationFailedReason, err.Error())
return ctrl.Result{}, err
}

// Applies new catalog sources
if err := reconcileCatalog.Reconcile(ctx, r.Client, r.Scheme, relocation, logger); err != nil {
r.setFailedStatus(relocation, rhsysenggithubiov1beta1.CatalogReconciliationFailedReason, err.Error())
return ctrl.Result{}, err
}

// Applies a new certificate and domain alias to the Ingress
if err := reconcileIngress.Reconcile(ctx, r.Client, r.Scheme, relocation, logger); err != nil {
r.setFailedStatus(relocation, rhsysenggithubiov1beta1.IngressReconciliationFailedReason, err.Error())
return ctrl.Result{}, err
}

// Applies a new certificate and domain alias to the API server
if err := reconcileAPI.Reconcile(ctx, r.Client, r.Scheme, relocation, logger); err != nil {
r.setFailedStatus(relocation, rhsysenggithubiov1beta1.APIReconciliationFailedReason, err.Error())
return ctrl.Result{}, err
}

if err := r.verifyDomain(ctx, relocation.Spec.Domain, logger); err != nil {
r.setFailedStatus(relocation, rhsysenggithubiov1beta1.InProgressReconciliationFailedReason, err.Error())
return ctrl.Result{}, err
}

if err := reconcileIngress.ResetRoutes(ctx, r.Client, fmt.Sprintf("apps.%s", relocation.Spec.Domain), logger); err != nil {
r.setFailedStatus(relocation, rhsysenggithubiov1beta1.InProgressReconciliationFailedReason, err.Error())
return ctrl.Result{}, err
}

// Registers to ACM
if err := reconcileACM.Reconcile(ctx, r.Client, r.Scheme, relocation, logger); err != nil {
r.setFailedStatus(relocation, rhsysenggithubiov1beta1.ACMReconciliationFailedReason, err.Error())
Expand Down Expand Up @@ -328,12 +342,68 @@ func (r *ClusterRelocationReconciler) finalizeRelocation(ctx context.Context, lo
if err := reconcileAPI.Cleanup(ctx, r.Client, logger); err != nil {
return err
}

clusterDNS := &configv1.DNS{}
if err := r.Client.Get(ctx, types.NamespacedName{Name: "cluster"}, clusterDNS); err != nil {
return err
}
if err := r.verifyDomain(ctx, clusterDNS.Spec.BaseDomain, logger); err != nil {
return err
}

if err := reconcileIngress.ResetRoutes(ctx, r.Client, fmt.Sprintf("apps.%s", clusterDNS.Spec.BaseDomain), logger); err != nil {
return err
}
}

logger.Info("Successfully finalized ClusterRelocation")
return nil
}

func (r *ClusterRelocationReconciler) verifyDomain(ctx context.Context, domainName string, logger logr.Logger) error {
urls := []map[string]string{
{
"type": "ingress",
"url": fmt.Sprintf("test.apps.%s:443", domainName),
"commonName": fmt.Sprintf("*.apps.%s", domainName),
},
{
"type": "kube-apiserver",
"url": fmt.Sprintf("api.%s:6443", domainName),
"commonName": fmt.Sprintf("api.%s", domainName),
},
}

for _, v := range urls {
updated := false
for {
conn, err := tls.Dial("tcp", v["url"], &tls.Config{InsecureSkipVerify: true})
if err != nil {
return err
}
certs := conn.ConnectionState().PeerCertificates
conn.Close()
for _, cert := range certs {
if cert.Subject.CommonName == v["commonName"] {
updated = true
}
}
if updated {
// ensure that ClusterOperator has settled
if err := util.WaitForCO(ctx, r.Client, logger, v["type"]); err != nil {
return err
}
break
} else {
logger.Info(fmt.Sprintf("Waiting for %s to update", v["type"]))
time.Sleep(time.Second * 10)
}
}
}

return nil
}

func (r *ClusterRelocationReconciler) installSchemes() error {
if err := configv1.Install(r.Scheme); err != nil { // Add config.openshift.io/v1 to the scheme
return err
Expand Down
9 changes: 0 additions & 9 deletions internal/api/reconcile.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import (

rhsysenggithubiov1beta1 "github.com/RHsyseng/cluster-relocation-operator/api/v1beta1"
secrets "github.com/RHsyseng/cluster-relocation-operator/internal/secrets"
"github.com/RHsyseng/cluster-relocation-operator/internal/util"
"github.com/go-logr/logr"

configv1 "github.com/openshift/api/config/v1"
Expand Down Expand Up @@ -117,9 +116,6 @@ func Reconcile(ctx context.Context, c client.Client, scheme *runtime.Scheme, rel
return err
}
if op != controllerutil.OperationResultNone {
if err := util.WaitForCO(ctx, c, logger, "kube-apiserver", true); err != nil {
return err
}
logger.Info("APIServer modified", "OperationResult", op)
}
return nil
Expand All @@ -137,11 +133,6 @@ func Cleanup(ctx context.Context, c client.Client, logger logr.Logger) error {
return err
}
if op != controllerutil.OperationResultNone {
// if we let the finalizer finish before the API server has updated, it will delete a MachineConfig and cause a reboot
// if the node reboots before the API server has updated, it can cause the API server to lock up on the next boot
if err := util.WaitForCO(ctx, c, logger, "kube-apiserver", true); err != nil {
return err
}
logger.Info("APIServer reverted to original state", "OperationResult", op)
}
return nil
Expand Down
34 changes: 7 additions & 27 deletions internal/ingress/reconcile.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,9 +144,6 @@ func Reconcile(ctx context.Context, c client.Client, scheme *runtime.Scheme, rel
}

if op != controllerutil.OperationResultNone {
if err := util.WaitForCO(ctx, c, logger, "ingress", true); err != nil {
return err
}
logger.Info("IngressController modified", "OperationResult", op)
}

Expand Down Expand Up @@ -186,16 +183,9 @@ func Reconcile(ctx context.Context, c client.Client, scheme *runtime.Scheme, rel
}

if op != controllerutil.OperationResultNone {
if err := util.WaitForCO(ctx, c, logger, "openshift-apiserver", true); err != nil {
return err
}
logger.Info("Ingress domain aliases modified", "OperationResult", op)
}

if err := resetRoutes(ctx, c, fmt.Sprintf("apps.%s", relocation.Spec.Domain), logger); err != nil {
return err
}

return nil
}

Expand All @@ -213,9 +203,6 @@ func Cleanup(ctx context.Context, c client.Client, logger logr.Logger) error {
return err
}
if op != controllerutil.OperationResultNone {
if err := util.WaitForCO(ctx, c, logger, "ingress", true); err != nil {
return err
}
logger.Info("Ingress Controller reverted to original state", "OperationResult", op)
}
ingress := &configv1.Ingress{ObjectMeta: metav1.ObjectMeta{Name: "cluster"}}
Expand All @@ -228,34 +215,27 @@ func Cleanup(ctx context.Context, c client.Client, logger logr.Logger) error {
return err
}
if op != controllerutil.OperationResultNone {
// let the openshift-apiserver operator settle before deleting the Routes
// this ensures that the Routes get the proper domain when they are re-created
if err := util.WaitForCO(ctx, c, logger, "openshift-apiserver", true); err != nil {
return err
}
logger.Info("Cluster Ingress reverted to original state", "OperationResult", op)

}

if err := resetRoutes(ctx, c, ingress.Spec.Domain, logger); err != nil { // reset routes to their original domain if needed
return err
}

return nil
}

func resetRoutes(ctx context.Context, c client.Client, domainName string, logger logr.Logger) error {
if err := util.WaitForCO(ctx, c, logger, "openshift-apiserver", false); err != nil {
func ResetRoutes(ctx context.Context, c client.Client, domainName string, logger logr.Logger) error {
routes := &routev1.RouteList{}
if err := c.List(ctx, routes); err != nil {
return err
}

routes := &routev1.RouteList{}
if err := c.List(ctx, routes); err != nil {
if err := util.WaitForCO(ctx, c, logger, "openshift-apiserver"); err != nil {
return err
}

for _, v := range routes.Items {
if v.Namespace == "openshift-console" || v.Namespace == "openshift-authentication" || v.Namespace == "open-cluster-management-agent-addon" {
// open-cluster-management-agent-addon is ignored because right now the Klusterlet Add-on ignores the "appsDomain" setting
// A PR has been opened to correct this: https://github.com/stolostron/multicloud-operators-foundation/pull/642
// without this fix, the Route created by the Klusterlet is always re-created with the original domain
continue
}
for _, w := range v.Status.Ingress {
Expand Down
9 changes: 1 addition & 8 deletions internal/util/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,7 @@ import (
//+kubebuilder:rbac:groups=config.openshift.io,resources=clusteroperators,verbs=get;list;watch

// Waits for the operator to update before returning
func WaitForCO(ctx context.Context, c client.Client, logger logr.Logger, operator string, waitProgressingTrue bool) error {
if waitProgressingTrue {
logger.Info(fmt.Sprintf("Waiting for %s Progressing to be %s", operator, configv1.ConditionTrue))
if err := waitStatus(ctx, c, logger, operator, configv1.OperatorProgressing, configv1.ConditionTrue); err != nil {
return err
}
}

func WaitForCO(ctx context.Context, c client.Client, logger logr.Logger, operator string) error {
logger.Info(fmt.Sprintf("Waiting for %s Progressing to be %s", operator, configv1.ConditionFalse))
if err := waitStatus(ctx, c, logger, operator, configv1.OperatorProgressing, configv1.ConditionFalse); err != nil {
return err
Expand Down

0 comments on commit e7bc442

Please sign in to comment.