From bf3051eebbbb34a6c5eb266225cf2ec71fd3fdb1 Mon Sep 17 00:00:00 2001 From: Fabian Wiesel Date: Thu, 8 Jan 2026 12:27:47 +0100 Subject: [PATCH] EvictionController: Do not disable compute-service anymore The HypervisorMaintenanceController already disabled the compute-service, so drop doing that in the eviction controller. We just wait for the condition to be reached. --- internal/controller/eviction_controller.go | 216 ++---------------- .../controller/eviction_controller_test.go | 130 +++++------ 2 files changed, 76 insertions(+), 270 deletions(-) diff --git a/internal/controller/eviction_controller.go b/internal/controller/eviction_controller.go index 28d669d3..b8fc678b 100644 --- a/internal/controller/eviction_controller.go +++ b/internal/controller/eviction_controller.go @@ -28,14 +28,12 @@ import ( "github.com/gophercloud/gophercloud/v2" "github.com/gophercloud/gophercloud/v2/openstack/compute/v2/hypervisors" "github.com/gophercloud/gophercloud/v2/openstack/compute/v2/servers" - "github.com/gophercloud/gophercloud/v2/openstack/compute/v2/services" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" logger "sigs.k8s.io/controller-runtime/pkg/log" kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" @@ -50,7 +48,6 @@ type EvictionReconciler struct { } const ( - evictionFinalizerName = "eviction-controller.cloud.sap/finalizer" EvictionControllerName = "eviction" shortRetryTime = 1 * time.Second defaultPollTime = 10 * time.Second @@ -84,15 +81,7 @@ func (r *EvictionReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c // Being deleted if !eviction.DeletionTimestamp.IsZero() { - err := r.handleFinalizer(ctx, eviction, hv) - if err != nil { - if errors.Is(err, ErrRetry) { - return ctrl.Result{RequeueAfter: defaultWaitTime}, nil - } - return ctrl.Result{}, err - } - log.Info("deleted") - return ctrl.Result{}, err + return ctrl.Result{}, nil } statusCondition := meta.FindStatusCondition(eviction.Status.Conditions, kvmv1.ConditionTypeEvicting) @@ -157,9 +146,25 @@ func (r *EvictionReconciler) updateStatus(ctx context.Context, eviction, base *k func (r *EvictionReconciler) handlePreflight(ctx context.Context, eviction *kvmv1.Eviction, hv *kvmv1.Hypervisor) (ctrl.Result, error) { base := eviction.DeepCopy() - expectHypervisor := HasStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeOnboarding) - // Does the hypervisor even exist? Is it enabled/disabled? - hypervisor, err := hypervisors.Get(ctx, r.computeClient, hv.Status.HypervisorID).Extract() + expectHypervisor := hv.Status.HypervisorID != "" && hv.Status.ServiceID != "" // The hypervisor has been registered + + // If the hypervisor should exist, then we need to ensure it is disabled before we start evicting + if expectHypervisor && !meta.IsStatusConditionTrue(hv.Status.Conditions, kvmv1.ConditionTypeHypervisorDisabled) { + // Hypervisor is not disabled (yet?), reflect that as a failing preflight check + if meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{ + Type: kvmv1.ConditionTypePreflight, + Status: metav1.ConditionFalse, + Message: "hypervisor not disabled", + Reason: kvmv1.ConditionReasonFailed, + }) { + return ctrl.Result{}, r.updateStatus(ctx, eviction, base) + } + return ctrl.Result{RequeueAfter: defaultPollTime}, nil // Wait for hypervisor to be disabled + } + + // Fetch all virtual machines on the hypervisor + trueVal := true + hypervisor, err := hypervisors.GetExt(ctx, r.computeClient, hv.Status.HypervisorID, hypervisors.GetOpts{WithServers: &trueVal}).Extract() if err != nil { if !gophercloud.ResponseCodeIs(err, http.StatusNotFound) { return ctrl.Result{}, err @@ -190,18 +195,6 @@ func (r *EvictionReconciler) handlePreflight(ctx context.Context, eviction *kvmv } } - if !meta.IsStatusConditionTrue(eviction.Status.Conditions, kvmv1.ConditionTypeHypervisorDisabled) { - // Hypervisor is not disabled/ensured, so we need to disable it - return ctrl.Result{}, r.disableHypervisor(ctx, hypervisor, eviction) - } - - // Fetch all virtual machines on the hypervisor - trueVal := true - hypervisor, err = hypervisors.GetExt(ctx, r.computeClient, hv.Status.HypervisorID, hypervisors.GetOpts{WithServers: &trueVal}).Extract() - if err != nil { - return ctrl.Result{}, err - } - if hypervisor.Servers != nil { uuids := make([]string, len(*hypervisor.Servers)) for i, server := range *hypervisor.Servers { @@ -281,11 +274,9 @@ func (r *EvictionReconciler) evictNext(ctx context.Context, eviction *kvmv1.Evic Message: fmt.Sprintf("Migration of instance %s failed: %s", vm.ID, vm.Fault.Message), Reason: kvmv1.ConditionReasonFailed, }) - if err := r.updateStatus(ctx, eviction, base); err != nil { - return ctrl.Result{}, err - } - return ctrl.Result{}, fmt.Errorf("error migrating instance %v", uuid) + return ctrl.Result{}, errors.Join(fmt.Errorf("error migrating instance %v", uuid), + r.updateStatus(ctx, eviction, base)) } currentHypervisor, _, _ := strings.Cut(vm.HypervisorHostname, ".") @@ -358,169 +349,6 @@ func (r *EvictionReconciler) evictNext(ctx context.Context, eviction *kvmv1.Evic return ctrl.Result{RequeueAfter: defaultPollTime}, err } -func (r *EvictionReconciler) evictionReason(eviction *kvmv1.Eviction) string { - return fmt.Sprintf("Eviction %v/%v: %v", eviction.Namespace, eviction.Name, eviction.Spec.Reason) -} - -func (r *EvictionReconciler) handleFinalizer(ctx context.Context, eviction *kvmv1.Eviction, hypervisor *kvmv1.Hypervisor) error { - if !controllerutil.ContainsFinalizer(eviction, evictionFinalizerName) { - return nil - } - - // As long as we didn't succeed to re-enable the hypervisor, which includes - // - the hypervisor being gone, because it has been torn down - // - the hypervisor having been enabled by someone else - if !meta.IsStatusConditionTrue(eviction.Status.Conditions, kvmv1.ConditionTypeHypervisorReEnabled) { - err := r.enableHypervisorService(ctx, eviction, hypervisor) - if err != nil { - return err - } - } - - base := eviction.DeepCopy() - controllerutil.RemoveFinalizer(eviction, evictionFinalizerName) - return r.Patch(ctx, eviction, client.MergeFromWithOptions(base, client.MergeFromWithOptimisticLock{})) -} - -func (r *EvictionReconciler) enableHypervisorService(ctx context.Context, eviction *kvmv1.Eviction, hv *kvmv1.Hypervisor) error { - log := logger.FromContext(ctx) - - hypervisor, err := hypervisors.Get(ctx, r.computeClient, hv.Status.HypervisorID).Extract() - if err != nil { - if errors.Is(err, openstack.ErrNoHypervisor) { - base := eviction.DeepCopy() - changed := meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeHypervisorReEnabled, - Status: metav1.ConditionTrue, - Message: "Hypervisor is gone, no need to re-enable", - Reason: kvmv1.ConditionReasonSucceeded, - }) - if changed { - return r.updateStatus(ctx, eviction, base) - } else { - return nil - } - } else { - base := eviction.DeepCopy() - errorMessage := fmt.Sprintf("failed to get hypervisor due to %s", err) - // update the condition to reflect the error, and retry the reconciliation - changed := meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeHypervisorReEnabled, - Status: metav1.ConditionFalse, - Message: errorMessage, - Reason: kvmv1.ConditionReasonFailed, - }) - - if changed { - if err2 := r.updateStatus(ctx, eviction, base); err2 != nil { - log.Error(err, "failed to store error message in condition", "message", errorMessage) - return err2 - } - } - - return ErrRetry - } - } - - if hypervisor.Service.DisabledReason != r.evictionReason(eviction) { - base := eviction.DeepCopy() - changed := meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeHypervisorReEnabled, - Status: metav1.ConditionTrue, - Message: "Hypervisor already re-enabled for reason:" + hypervisor.Service.DisabledReason, - Reason: kvmv1.ConditionReasonSucceeded, - }) - if changed { - return r.updateStatus(ctx, eviction, base) - } else { - return nil - } - } - - enableService := services.UpdateOpts{Status: services.ServiceEnabled} - log.Info("Enabling hypervisor", "id", hypervisor.Service.ID) - _, err = services.Update(ctx, r.computeClient, hypervisor.Service.ID, enableService).Extract() - - if err != nil { - base := eviction.DeepCopy() - errorMessage := fmt.Sprintf("failed to enable hypervisor due to %s", err) - changed := meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeHypervisorReEnabled, - Status: metav1.ConditionFalse, - Message: errorMessage, - Reason: kvmv1.ConditionReasonFailed, - }) - if changed { - if err2 := r.updateStatus(ctx, eviction, base); err2 != nil { - log.Error(err, "failed to store error message in condition", "message", errorMessage) - } - } - return ErrRetry - } else { - base := eviction.DeepCopy() - changed := meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeHypervisorReEnabled, - Status: metav1.ConditionTrue, - Message: "Hypervisor re-enabled successfully", - Reason: kvmv1.ConditionReasonSucceeded, - }) - if changed { - return r.updateStatus(ctx, eviction, base) - } else { - return nil - } - } -} - -// disableHypervisor disables the hypervisor service and adds a finalizer to the eviction -// will add Condition HypervisorDisabled to the eviction status with the outcome -func (r *EvictionReconciler) disableHypervisor(ctx context.Context, hypervisor *hypervisors.Hypervisor, eviction *kvmv1.Eviction) error { - evictionReason := r.evictionReason(eviction) - disabledReason := hypervisor.Service.DisabledReason - - if disabledReason != "" && disabledReason != evictionReason { - base := eviction.DeepCopy() - // Disabled for another reason already - meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeHypervisorDisabled, - Status: metav1.ConditionTrue, - Message: fmt.Sprintf("Hypervisor already disabled for reason %q", disabledReason), - Reason: kvmv1.ConditionReasonSucceeded, - }) - return r.updateStatus(ctx, eviction, base) - } - - if !controllerutil.ContainsFinalizer(eviction, evictionFinalizerName) { - base := eviction.DeepCopy() - controllerutil.AddFinalizer(eviction, evictionFinalizerName) - return r.Patch(ctx, eviction, client.MergeFromWithOptions(base, client.MergeFromWithOptimisticLock{}), client.FieldOwner(EvictionControllerName)) - } - - disableService := services.UpdateOpts{Status: services.ServiceDisabled, - DisabledReason: r.evictionReason(eviction)} - - _, err := services.Update(ctx, r.computeClient, hypervisor.Service.ID, disableService).Extract() - base := eviction.DeepCopy() - if err != nil { - // We expect OpenStack calls to be transient errors, so we retry for the next reconciliation - meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeHypervisorDisabled, - Status: metav1.ConditionFalse, - Message: fmt.Sprintf("Failed to disable hypervisor: %v", err), - Reason: kvmv1.ConditionReasonFailed, - }) - return r.updateStatus(ctx, eviction, base) - } - - meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeHypervisorDisabled, - Status: metav1.ConditionTrue, - Message: "Hypervisor disabled successfully", - Reason: kvmv1.ConditionReasonSucceeded, - }) - return r.updateStatus(ctx, eviction, base) -} - func (r *EvictionReconciler) liveMigrate(ctx context.Context, uuid string, eviction *kvmv1.Eviction) error { log := logger.FromContext(ctx) diff --git a/internal/controller/eviction_controller_test.go b/internal/controller/eviction_controller_test.go index 3b9e45f5..96c8a429 100644 --- a/internal/controller/eviction_controller_test.go +++ b/internal/controller/eviction_controller_test.go @@ -25,6 +25,7 @@ import ( "github.com/gophercloud/gophercloud/v2/testhelper/client" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + gomegatypes "github.com/onsi/gomega/types" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -186,12 +187,19 @@ var _ = Describe("Eviction Controller", func() { }) hypervisor.Status.HypervisorID = hypervisorId + hypervisor.Status.ServiceID = serviceId meta.SetStatusCondition(&hypervisor.Status.Conditions, metav1.Condition{ Type: kvmv1.ConditionTypeOnboarding, Status: metav1.ConditionTrue, Reason: "dontcare", Message: "dontcare", }) + meta.SetStatusCondition(&hypervisor.Status.Conditions, metav1.Condition{ + Type: kvmv1.ConditionTypeHypervisorDisabled, + Status: metav1.ConditionTrue, + Reason: "dontcare", + Message: "dontcare", + }) Expect(k8sClient.Status().Update(ctx, hypervisor)).To(Succeed()) By("creating the eviction") @@ -255,57 +263,32 @@ var _ = Describe("Eviction Controller", func() { }) }) It("should succeed the reconciliation", func(ctx SpecContext) { - runningCond := &metav1.Condition{ - Type: kvmv1.ConditionTypeEvicting, - Status: metav1.ConditionTrue, - Reason: kvmv1.ConditionReasonRunning, - Message: "Running", - } - - hypervisorDisabledCond := &metav1.Condition{ - Type: kvmv1.ConditionTypeHypervisorDisabled, - Status: metav1.ConditionTrue, - Reason: kvmv1.ConditionReasonSucceeded, - Message: "Hypervisor disabled successfully", - } - - preflightCond := &metav1.Condition{ - Type: kvmv1.ConditionTypePreflight, - Status: metav1.ConditionTrue, - Reason: kvmv1.ConditionReasonSucceeded, - Message: "Preflight checks passed", - } - - expectations := []struct { - conditions []*metav1.Condition - finalizers []string - }{ + runningCond := SatisfyAll( + HaveField("Type", kvmv1.ConditionTypeEvicting), + HaveField("Status", metav1.ConditionTrue), + HaveField("Reason", kvmv1.ConditionReasonRunning), + HaveField("Message", "Running"), + ) + + preflightCond := SatisfyAll( + HaveField("Type", kvmv1.ConditionTypePreflight), + HaveField("Status", metav1.ConditionTrue), + HaveField("Reason", kvmv1.ConditionReasonSucceeded), + HaveField("Message", ContainSubstring("Preflight checks passed")), + ) + + expectations := []gomegatypes.GomegaMatcher{ // 1. expect the Condition Evicting to be true - {conditions: []*metav1.Condition{runningCond}, finalizers: nil}, - - // 2. expect the Finalizer to be added - {conditions: []*metav1.Condition{runningCond}, finalizers: []string{evictionFinalizerName}}, - - // 3. expect the hypervisor to be disabled - { - conditions: []*metav1.Condition{runningCond, hypervisorDisabledCond}, - finalizers: []string{evictionFinalizerName}, - }, - - // 4. expect the preflight condition to be set to succeeded - { - conditions: []*metav1.Condition{runningCond, hypervisorDisabledCond, preflightCond}, - finalizers: []string{evictionFinalizerName}, - }, - - // 5. expect the eviction condition to be set to succeeded - { - conditions: []*metav1.Condition{{ - Type: kvmv1.ConditionTypeEvicting, - Status: metav1.ConditionFalse, - Reason: kvmv1.ConditionReasonSucceeded, - Message: "eviction completed successfully"}}, - finalizers: []string{evictionFinalizerName}}, + ContainElements(runningCond), + // 2. expect the preflight condition to be set to succeeded + ContainElements(runningCond, preflightCond), + // 3. expect the eviction condition to be set to succeeded + ContainElements(SatisfyAll( + HaveField("Type", kvmv1.ConditionTypeEvicting), + HaveField("Status", metav1.ConditionFalse), + HaveField("Reason", kvmv1.ConditionReasonSucceeded), + HaveField("Message", ContainSubstring("eviction completed successfully")), + )), } for i, expectation := range expectations { @@ -319,15 +302,7 @@ var _ = Describe("Eviction Controller", func() { Expect(k8sClient.Get(ctx, typeNamespacedName, resource)).NotTo(HaveOccurred()) // Check the condition - for _, expect := range expectation.conditions { - reconcileStatus := meta.FindStatusCondition(resource.Status.Conditions, expect.Type) - Expect(reconcileStatus).NotTo(BeNil()) - Expect(reconcileStatus.Status).To(Equal(expect.Status)) - Expect(reconcileStatus.Reason).To(Equal(expect.Reason)) - Expect(reconcileStatus.Message).To(ContainSubstring(expect.Message)) - } - // Check finalizers - Expect(resource.GetFinalizers()).To(Equal(expectation.finalizers)) + Expect(resource.Status.Conditions).To(expectation) } }) }) @@ -349,8 +324,9 @@ var _ = Describe("Eviction Controller", func() { Expect(err).To(Succeed()) }) }) + It("should succeed the reconciliation", func(ctx SpecContext) { - for range 3 { + for range 1 { _, err := controllerReconciler.Reconcile(ctx, reconcileRequest) Expect(err).NotTo(HaveOccurred()) } @@ -360,27 +336,29 @@ var _ = Describe("Eviction Controller", func() { Expect(err).NotTo(HaveOccurred()) // expect eviction condition to be true - reconcileStatus := meta.FindStatusCondition(resource.Status.Conditions, kvmv1.ConditionTypeEvicting) - Expect(reconcileStatus).NotTo(BeNil()) - Expect(reconcileStatus.Status).To(Equal(metav1.ConditionTrue)) - Expect(reconcileStatus.Reason).To(Equal(kvmv1.ConditionReasonRunning)) + Expect(resource.Status.Conditions).To(ContainElement( + SatisfyAll( + HaveField("Type", kvmv1.ConditionTypeEvicting), + HaveField("Status", metav1.ConditionTrue), + HaveField("Reason", kvmv1.ConditionReasonRunning), + ), + )) - // expect hypervisor disabled condition to be true for reason of already disabled - reconcileStatus = meta.FindStatusCondition(resource.Status.Conditions, kvmv1.ConditionTypeHypervisorDisabled) - Expect(reconcileStatus.Message).To(ContainSubstring("already disabled")) - - _, err = controllerReconciler.Reconcile(ctx, reconcileRequest) - Expect(err).NotTo(HaveOccurred()) + for range 3 { + _, err = controllerReconciler.Reconcile(ctx, reconcileRequest) + Expect(err).NotTo(HaveOccurred()) + } err = k8sClient.Get(ctx, typeNamespacedName, resource) Expect(err).NotTo(HaveOccurred()) // expect reconciliation to be successfully finished - reconcileStatus = meta.FindStatusCondition(resource.Status.Conditions, kvmv1.ConditionTypeEvicting) - Expect(reconcileStatus).NotTo(BeNil()) - Expect(reconcileStatus.Status).To(Equal(metav1.ConditionFalse)) - Expect(reconcileStatus.Reason).To(Equal(kvmv1.ConditionReasonSucceeded)) - - Expect(resource.GetFinalizers()).To(BeEmpty()) + Expect(resource.Status.Conditions).To(ContainElement( + SatisfyAll( + HaveField("Type", kvmv1.ConditionTypeEvicting), + HaveField("Status", metav1.ConditionFalse), + HaveField("Reason", kvmv1.ConditionReasonSucceeded), + ), + )) }) }) })