From b3b8aa0a0040f07c8966e0684948acbe5a81d516 Mon Sep 17 00:00:00 2001 From: Patryk Strusiewicz-Surmacki Date: Wed, 16 Oct 2024 12:46:05 +0200 Subject: [PATCH] Changed FRR configuration behavior Signed-off-by: Patryk Strusiewicz-Surmacki --- pkg/frr/configure.go | 14 ++++-- pkg/reconciler/layer3.go | 90 ++++++++++++++++++++---------------- pkg/reconciler/reconciler.go | 2 - 3 files changed, 62 insertions(+), 44 deletions(-) diff --git a/pkg/frr/configure.go b/pkg/frr/configure.go index d5d8782..7b0e175 100644 --- a/pkg/frr/configure.go +++ b/pkg/frr/configure.go @@ -43,7 +43,7 @@ func (m *Manager) Configure(in Configuration, nm *nl.Manager, nwopCfg *config.Co for j := range in.VRFs[i].Import { for k := range in.VRFs[i].Import[j].Items { if in.VRFs[i].Import[j].Items[k].Action != "deny" { - return false, fmt.Errorf("only deny rules are allowed in import prefix-lists of mgmt VRFs") + return false, &ConfigurationError{fmt.Errorf("only deny rules are allowed in import prefix-lists of mgmt VRFs")} } // Swap deny to permit, this will be a prefix-list called from a deny route-map in.VRFs[i].Import[j].Items[k].Action = "permit" @@ -58,7 +58,7 @@ func (m *Manager) Configure(in Configuration, nm *nl.Manager, nwopCfg *config.Co currentConfig, err := os.ReadFile(m.ConfigPath) if err != nil { - return false, fmt.Errorf("error reading configuration file: %w", err) + return false, &ConfigurationError{fmt.Errorf("error reading configuration file: %w", err)} } targetConfig, err := render(m.configTemplate, frrConfig) @@ -72,7 +72,7 @@ func (m *Manager) Configure(in Configuration, nm *nl.Manager, nwopCfg *config.Co if !bytes.Equal(currentConfig, targetConfig) { err = os.WriteFile(m.ConfigPath, targetConfig, frrPermissions) if err != nil { - return false, fmt.Errorf("error writing configuration file: %w", err) + return false, &ConfigurationError{fmt.Errorf("error writing configuration file: %w", err)} } return true, nil @@ -187,3 +187,11 @@ func applyCfgReplacements(frrConfig []byte, replacements []config.Replacement) [ } return frrConfig } + +type ConfigurationError struct { + err error +} + +func (r *ConfigurationError) Error() string { + return fmt.Sprintf("FRR configuration error: %s", r.err.Error()) +} diff --git a/pkg/reconciler/layer3.go b/pkg/reconciler/layer3.go index f66e91d..ecad4bf 100644 --- a/pkg/reconciler/layer3.go +++ b/pkg/reconciler/layer3.go @@ -2,6 +2,7 @@ package reconciler import ( "context" + "errors" "fmt" "net" "sort" @@ -65,6 +66,15 @@ func (r *reconcile) reconcileLayer3(l3vnis []networkv1alpha1.VRFRouteConfigurati return allConfigs[i].VNI < allConfigs[j].VNI }) + // Create FRR configuration and reload it + err = r.configureFRR(allConfigs) + if err != nil { + if !errors.Is(err, &frr.ConfigurationError{}) { + return err + } + r.Logger.Error(err, "failed to configure FRR") + } + created, deletedVRF, err := r.reconcileL3Netlink(l3Configs) if err != nil { r.Logger.Error(err, "error reconciling Netlink") @@ -75,54 +85,40 @@ func (r *reconcile) reconcileLayer3(l3vnis []networkv1alpha1.VRFRouteConfigurati if err != nil { return err } - reloadTwice := deletedVRF || deletedTaas - // We wait here for two seconds to let FRR settle after updating netlink devices - time.Sleep(defaultSleep) - - err = r.configureFRR(allConfigs, reloadTwice) - if err != nil { - return err + // When a BGP VRF is deleted there is a leftover running configuration after reload + // A second reload fixes this. + if deletedVRF || deletedTaas { + if err := r.reloadFRR(); err != nil { + return fmt.Errorf("failed to reload FRR: %w", err) + } } - // Make sure that all created netlink VRFs are up after FRR reload + // We wait here for two seconds to let FRR settle after updating netlink devices time.Sleep(defaultSleep) + for _, info := range created { if err := r.netlinkManager.UpL3(info); err != nil { - r.Logger.Error(err, "error setting L3 to state UP") - return fmt.Errorf("error setting L3 to state UP: %w", err) + r.Logger.Error(err, "error setting L3 to state UP", "interface", info) } } return nil } -func (r *reconcile) configureFRR(vrfConfigs []frr.VRFConfiguration, reloadTwice bool) error { +func (r *reconcile) configureFRR(vrfConfigs []frr.VRFConfiguration) error { changed, err := r.frrManager.Configure(frr.Configuration{ VRFs: vrfConfigs, ASN: r.config.ServerASN, }, r.netlinkManager, r.config) if err != nil { - r.Logger.Error(err, "error updating FRR configuration") return fmt.Errorf("error updating FRR configuration: %w", err) } - if changed || r.dirtyFRRConfig { + if changed { err := r.reloadFRR() if err != nil { - r.dirtyFRRConfig = true return err } - - // When a BGP VRF is deleted there is a leftover running configuration after reload - // A second reload fixes this. - if reloadTwice { - err := r.reloadFRR() - if err != nil { - r.dirtyFRRConfig = true - return err - } - } - r.dirtyFRRConfig = false } return nil } @@ -249,20 +245,12 @@ func (r *reconcile) reconcileL3Netlink(vrfConfigs []frr.VRFConfiguration) ([]nl. } // Check for VRFs that are configured on the host but no longer in Kubernetes - toDelete := []nl.VRFInformation{} - for i := range existing { - stillExists := false - for j := range vrfConfigs { - if vrfConfigs[j].Name == existing[i].Name && vrfConfigs[j].VNI == existing[i].VNI { - stillExists = true - existing[i].MTU = vrfConfigs[j].MTU - break - } - } - if !stillExists || existing[i].MarkForDelete { - toDelete = append(toDelete, existing[i]) - } else if err := r.reconcileExisting(existing[i]); err != nil { - r.Logger.Error(err, "error reconciling existing VRF", "vrf", existing[i].Name, "vni", strconv.Itoa(existing[i].VNI)) + preexisting, toDelete := r.gatherInterfacesInfo(vrfConfigs, existing) + + // Make sure that all previously configured L3 interfaces are up + for _, info := range preexisting { + if err := r.netlinkManager.UpL3(info); err != nil { + r.Logger.Error(err, "failed to set L3 up", "interface", info.Name) } } @@ -289,6 +277,30 @@ func (r *reconcile) reconcileL3Netlink(vrfConfigs []frr.VRFConfiguration) ([]nl. return toCreate, len(toDelete) > 0, nil } +func (r *reconcile) gatherInterfacesInfo(vrfConfigs []frr.VRFConfiguration, existing []nl.VRFInformation) (preexisting, toDelete []nl.VRFInformation) { + // Check for VRFs that are configured on the host but no longer in Kubernetes + for i := range existing { + stillExists := false + for j := range vrfConfigs { + if vrfConfigs[j].Name == existing[i].Name && vrfConfigs[j].VNI == existing[i].VNI { + stillExists = true + existing[i].MTU = vrfConfigs[j].MTU + if !existing[i].MarkForDelete { + preexisting = append(preexisting, existing[i]) + } + break + } + } + if !stillExists || existing[i].MarkForDelete { + toDelete = append(toDelete, existing[i]) + } else if err := r.reconcileExisting(existing[i]); err != nil { + r.Logger.Error(err, "error reconciling existing VRF", "vrf", existing[i].Name, "vni", strconv.Itoa(existing[i].VNI)) + } + } + + return preexisting, toDelete +} + func (r *reconcile) reconcileTaasNetlink(vrfConfigs []frr.VRFConfiguration) (bool, error) { existing, err := r.netlinkManager.ListTaas() if err != nil { diff --git a/pkg/reconciler/reconciler.go b/pkg/reconciler/reconciler.go index 9f7dd94..2c430f0 100644 --- a/pkg/reconciler/reconciler.go +++ b/pkg/reconciler/reconciler.go @@ -28,8 +28,6 @@ type Reconciler struct { healthChecker *healthcheck.HealthChecker debouncer *debounce.Debouncer - - dirtyFRRConfig bool } type reconcile struct {