From 2bb06e40a8a111212c1001f3f6968bccacfd4aa2 Mon Sep 17 00:00:00 2001 From: Fabian Wiesel Date: Tue, 19 May 2026 11:21:00 +0200 Subject: [PATCH 1/3] Add ConditionsFromStatus and SetApplyConfigurationStatusCondition helpers Introduces SSA-friendly helpers to convert []metav1.Condition into a slice of ConditionApplyConfiguration and to set a condition by type while mirroring meta.SetStatusCondition's LastTransitionTime semantics. Foundation commit for migrating controllers to server-side apply; no controller currently uses them. Extracted from 0b757a84915f4ccdfa11c6dbd63eb10a3bfba582. --- internal/utils/conditions.go | 116 +++++++++++ internal/utils/conditions_test.go | 308 ++++++++++++++++++++++++++++++ 2 files changed, 424 insertions(+) create mode 100644 internal/utils/conditions.go create mode 100644 internal/utils/conditions_test.go diff --git a/internal/utils/conditions.go b/internal/utils/conditions.go new file mode 100644 index 00000000..2eef3a24 --- /dev/null +++ b/internal/utils/conditions.go @@ -0,0 +1,116 @@ +/* +SPDX-FileCopyrightText: Copyright 2024 SAP SE or an SAP affiliate company and cobaltcore-dev contributors +SPDX-License-Identifier: Apache-2.0 + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package utils + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + k8sacmetav1 "k8s.io/client-go/applyconfigurations/meta/v1" +) + +// ConditionsFromStatus converts a []metav1.Condition (as stored in a status +// subresource) into a []k8sacmetav1.ConditionApplyConfiguration suitable for +// use with server-side apply. All fields including LastTransitionTime are +// preserved verbatim. +func ConditionsFromStatus(conditions []metav1.Condition) []k8sacmetav1.ConditionApplyConfiguration { + result := make([]k8sacmetav1.ConditionApplyConfiguration, len(conditions)) + for i := range conditions { + c := &conditions[i] + result[i] = k8sacmetav1.ConditionApplyConfiguration{ + Type: &c.Type, + Status: &c.Status, + Reason: &c.Reason, + Message: &c.Message, + LastTransitionTime: &c.LastTransitionTime, + } + } + return result +} + +// SetApplyConfigurationStatusCondition sets the corresponding condition in +// conditions to newCondition and returns true if the conditions are changed by +// this call. It mirrors the behaviour of k8s.io/apimachinery/pkg/api/meta.SetStatusCondition: +// +// 1. If a condition of the specified type does not exist, newCondition is +// appended. LastTransitionTime is set to now if not provided. +// 2. If a condition of the specified type already exists, all fields are +// updated. LastTransitionTime is updated to now (or the provided value) +// only when Status changes; otherwise the existing LastTransitionTime is +// preserved. +func SetApplyConfigurationStatusCondition(conditions *[]k8sacmetav1.ConditionApplyConfiguration, newCondition k8sacmetav1.ConditionApplyConfiguration) (changed bool) { + if conditions == nil { + return false + } + + // Find existing entry by type + var existing *k8sacmetav1.ConditionApplyConfiguration + for i := range *conditions { + if (*conditions)[i].Type != nil && *(*conditions)[i].Type == *newCondition.Type { + existing = &(*conditions)[i] + break + } + } + + if existing == nil { + // New condition: set LastTransitionTime if not provided + if newCondition.LastTransitionTime == nil { + now := metav1.Now() + newCondition.LastTransitionTime = &now + } + *conditions = append(*conditions, newCondition) + return true + } + + // Existing condition: update fields, handle LastTransitionTime + statusChanged := existing.Status == nil || newCondition.Status == nil || + *existing.Status != *newCondition.Status + + if statusChanged { + existing.Status = newCondition.Status + if newCondition.LastTransitionTime != nil { + existing.LastTransitionTime = newCondition.LastTransitionTime + } else { + now := metav1.Now() + existing.LastTransitionTime = &now + } + changed = true + } + // When status is unchanged, LastTransitionTime is intentionally preserved. + + if strChanged(&existing.Reason, newCondition.Reason) { + existing.Reason = newCondition.Reason + changed = true + } + if strChanged(&existing.Message, newCondition.Message) { + existing.Message = newCondition.Message + changed = true + } + + return changed +} + +// strChanged reports whether the pointer value of b differs from the current +// value at a. +func strChanged(a **string, b *string) bool { + if *a == nil && b == nil { + return false + } + if *a == nil || b == nil { + return true + } + return **a != *b +} diff --git a/internal/utils/conditions_test.go b/internal/utils/conditions_test.go new file mode 100644 index 00000000..fcca9447 --- /dev/null +++ b/internal/utils/conditions_test.go @@ -0,0 +1,308 @@ +/* +SPDX-FileCopyrightText: Copyright 2024 SAP SE or an SAP affiliate company and cobaltcore-dev contributors +SPDX-License-Identifier: Apache-2.0 + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package utils_test + +import ( + "testing" + "time" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + k8sacmetav1 "k8s.io/client-go/applyconfigurations/meta/v1" + + "github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/utils" +) + +// ptr returns a pointer to v; used to construct ConditionApplyConfiguration literals. +func ptr[T any](v T) *T { return &v } + +// --- ConditionsFromStatus --- + +func TestConditionsFromStatus_Empty(t *testing.T) { + result := utils.ConditionsFromStatus(nil) + if len(result) != 0 { + t.Fatalf("expected empty slice, got %d elements", len(result)) + } + + result = utils.ConditionsFromStatus([]metav1.Condition{}) + if len(result) != 0 { + t.Fatalf("expected empty slice, got %d elements", len(result)) + } +} + +func TestConditionsFromStatus_ConvertsAllFields(t *testing.T) { + ts := metav1.NewTime(time.Date(2024, 1, 2, 3, 4, 5, 0, time.UTC)) + input := []metav1.Condition{ + { + Type: "Ready", + Status: metav1.ConditionTrue, + Reason: "AllGood", + Message: "everything is fine", + LastTransitionTime: ts, + ObservedGeneration: 7, + }, + } + + result := utils.ConditionsFromStatus(input) + if len(result) != 1 { + t.Fatalf("expected 1 element, got %d", len(result)) + } + + c := result[0] + if c.Type == nil || *c.Type != "Ready" { + t.Errorf("Type: got %v, want Ready", c.Type) + } + if c.Status == nil || *c.Status != metav1.ConditionTrue { + t.Errorf("Status: got %v, want True", c.Status) + } + if c.Reason == nil || *c.Reason != "AllGood" { + t.Errorf("Reason: got %v, want AllGood", c.Reason) + } + if c.Message == nil || *c.Message != "everything is fine" { + t.Errorf("Message: got %v, want 'everything is fine'", c.Message) + } + if c.LastTransitionTime == nil || !c.LastTransitionTime.Equal(&ts) { + t.Errorf("LastTransitionTime: got %v, want %v", c.LastTransitionTime, ts) + } +} + +func TestConditionsFromStatus_MultipleConditions(t *testing.T) { + input := []metav1.Condition{ + {Type: "A", Status: metav1.ConditionTrue, Reason: "r", Message: "m", LastTransitionTime: metav1.Now()}, + {Type: "B", Status: metav1.ConditionFalse, Reason: "r2", Message: "m2", LastTransitionTime: metav1.Now()}, + } + result := utils.ConditionsFromStatus(input) + if len(result) != 2 { + t.Fatalf("expected 2 elements, got %d", len(result)) + } + if result[0].Type == nil || *result[0].Type != "A" { + t.Errorf("first condition type: got %v, want A", result[0].Type) + } + if result[1].Type == nil || *result[1].Type != "B" { + t.Errorf("second condition type: got %v, want B", result[1].Type) + } +} + +// --- SetApplyConfigurationStatusCondition --- + +func TestSetApplyConfigurationStatusCondition_NilSlicePointer(t *testing.T) { + changed := utils.SetApplyConfigurationStatusCondition(nil, + *k8sacmetav1.Condition().WithType("T").WithStatus(metav1.ConditionTrue).WithReason("R").WithMessage("M")) + if changed { + t.Error("expected false for nil conditions pointer") + } +} + +func TestSetApplyConfigurationStatusCondition_AppendNew(t *testing.T) { + var conditions []k8sacmetav1.ConditionApplyConfiguration + before := time.Now() + + changed := utils.SetApplyConfigurationStatusCondition(&conditions, + *k8sacmetav1.Condition(). + WithType("Ready"). + WithStatus(metav1.ConditionTrue). + WithReason("AllGood"). + WithMessage("fine")) + + after := time.Now() + + if !changed { + t.Error("expected changed=true for new condition") + } + if len(conditions) != 1 { + t.Fatalf("expected 1 condition, got %d", len(conditions)) + } + c := conditions[0] + if c.Type == nil || *c.Type != "Ready" { + t.Errorf("Type: got %v", c.Type) + } + if c.Status == nil || *c.Status != metav1.ConditionTrue { + t.Errorf("Status: got %v", c.Status) + } + if c.LastTransitionTime == nil { + t.Fatal("LastTransitionTime must be set for a new condition") + } + ltt := c.LastTransitionTime.Time + if ltt.Before(before) || ltt.After(after) { + t.Errorf("LastTransitionTime %v not in [%v, %v]", ltt, before, after) + } +} + +func TestSetApplyConfigurationStatusCondition_AppendNew_ExplicitLastTransitionTime(t *testing.T) { + var conditions []k8sacmetav1.ConditionApplyConfiguration + ts := metav1.NewTime(time.Date(2020, 1, 1, 0, 0, 0, 0, time.UTC)) + + utils.SetApplyConfigurationStatusCondition(&conditions, + *k8sacmetav1.Condition(). + WithType("Ready"). + WithStatus(metav1.ConditionTrue). + WithReason("R"). + WithMessage("M"). + WithLastTransitionTime(ts)) + + if conditions[0].LastTransitionTime == nil || !conditions[0].LastTransitionTime.Equal(&ts) { + t.Errorf("expected explicit LastTransitionTime %v, got %v", ts, conditions[0].LastTransitionTime) + } +} + +func TestSetApplyConfigurationStatusCondition_StatusUnchanged_PreservesLastTransitionTime(t *testing.T) { + ts := metav1.NewTime(time.Date(2020, 1, 1, 0, 0, 0, 0, time.UTC)) + conditions := []k8sacmetav1.ConditionApplyConfiguration{ + *k8sacmetav1.Condition(). + WithType("Ready"). + WithStatus(metav1.ConditionTrue). + WithReason("OldReason"). + WithMessage("old message"). + WithLastTransitionTime(ts), + } + + changed := utils.SetApplyConfigurationStatusCondition(&conditions, + *k8sacmetav1.Condition(). + WithType("Ready"). + WithStatus(metav1.ConditionTrue). + WithReason("NewReason"). + WithMessage("new message")) + + if !changed { + t.Error("expected changed=true because Reason/Message changed") + } + c := conditions[0] + if c.Reason == nil || *c.Reason != "NewReason" { + t.Errorf("Reason not updated: got %v", c.Reason) + } + if c.Message == nil || *c.Message != "new message" { + t.Errorf("Message not updated: got %v", c.Message) + } + // LastTransitionTime must NOT have changed + if c.LastTransitionTime == nil || !c.LastTransitionTime.Equal(&ts) { + t.Errorf("LastTransitionTime must be preserved when status unchanged: got %v, want %v", + c.LastTransitionTime, ts) + } +} + +func TestSetApplyConfigurationStatusCondition_StatusUnchanged_NoFieldChange_ReturnsFalse(t *testing.T) { + ts := metav1.NewTime(time.Date(2020, 1, 1, 0, 0, 0, 0, time.UTC)) + conditions := []k8sacmetav1.ConditionApplyConfiguration{ + *k8sacmetav1.Condition(). + WithType("Ready"). + WithStatus(metav1.ConditionTrue). + WithReason("R"). + WithMessage("M"). + WithLastTransitionTime(ts), + } + + changed := utils.SetApplyConfigurationStatusCondition(&conditions, + *k8sacmetav1.Condition(). + WithType("Ready"). + WithStatus(metav1.ConditionTrue). + WithReason("R"). + WithMessage("M")) + + if changed { + t.Error("expected changed=false when nothing changed") + } +} + +func TestSetApplyConfigurationStatusCondition_StatusChanged_UpdatesLastTransitionTime(t *testing.T) { + old := metav1.NewTime(time.Date(2020, 1, 1, 0, 0, 0, 0, time.UTC)) + conditions := []k8sacmetav1.ConditionApplyConfiguration{ + *k8sacmetav1.Condition(). + WithType("Ready"). + WithStatus(metav1.ConditionTrue). + WithReason("R"). + WithMessage("M"). + WithLastTransitionTime(old), + } + + before := time.Now() + changed := utils.SetApplyConfigurationStatusCondition(&conditions, + *k8sacmetav1.Condition(). + WithType("Ready"). + WithStatus(metav1.ConditionFalse). + WithReason("Broken"). + WithMessage("it broke")) + after := time.Now() + + if !changed { + t.Error("expected changed=true for status change") + } + c := conditions[0] + if c.Status == nil || *c.Status != metav1.ConditionFalse { + t.Errorf("Status not updated: got %v", c.Status) + } + if c.LastTransitionTime == nil { + t.Fatal("LastTransitionTime must be set after status change") + } + ltt := c.LastTransitionTime.Time + if ltt.Before(before) || ltt.After(after) { + t.Errorf("LastTransitionTime %v not in [%v, %v]", ltt, before, after) + } +} + +func TestSetApplyConfigurationStatusCondition_StatusChanged_ExplicitLastTransitionTime(t *testing.T) { + old := metav1.NewTime(time.Date(2020, 1, 1, 0, 0, 0, 0, time.UTC)) + explicit := metav1.NewTime(time.Date(2021, 6, 15, 0, 0, 0, 0, time.UTC)) + conditions := []k8sacmetav1.ConditionApplyConfiguration{ + *k8sacmetav1.Condition(). + WithType("Ready"). + WithStatus(metav1.ConditionTrue). + WithReason("R"). + WithMessage("M"). + WithLastTransitionTime(old), + } + + utils.SetApplyConfigurationStatusCondition(&conditions, + *k8sacmetav1.Condition(). + WithType("Ready"). + WithStatus(metav1.ConditionFalse). + WithReason("R"). + WithMessage("M"). + WithLastTransitionTime(explicit)) + + if conditions[0].LastTransitionTime == nil || !conditions[0].LastTransitionTime.Equal(&explicit) { + t.Errorf("expected explicit LastTransitionTime %v, got %v", explicit, conditions[0].LastTransitionTime) + } +} + +func TestSetApplyConfigurationStatusCondition_OtherConditionsUntouched(t *testing.T) { + ts := metav1.NewTime(time.Date(2020, 1, 1, 0, 0, 0, 0, time.UTC)) + conditions := []k8sacmetav1.ConditionApplyConfiguration{ + *k8sacmetav1.Condition().WithType("Other").WithStatus(metav1.ConditionTrue). + WithReason("R").WithMessage("M").WithLastTransitionTime(ts), + *k8sacmetav1.Condition().WithType("Ready").WithStatus(metav1.ConditionTrue). + WithReason("R").WithMessage("M").WithLastTransitionTime(ts), + } + + utils.SetApplyConfigurationStatusCondition(&conditions, + *k8sacmetav1.Condition(). + WithType("Ready"). + WithStatus(metav1.ConditionFalse). + WithReason("Broken"). + WithMessage("broke")) + + // "Other" must be completely unchanged + other := conditions[0] + if other.Type == nil || *other.Type != "Other" { + t.Errorf("Other condition type changed: %v", other.Type) + } + if other.Status == nil || *other.Status != metav1.ConditionTrue { + t.Errorf("Other condition status changed: %v", other.Status) + } + if other.LastTransitionTime == nil || !other.LastTransitionTime.Equal(&ts) { + t.Errorf("Other condition LastTransitionTime changed: %v", other.LastTransitionTime) + } +} From 9458d90773e897043252168d90cdc42f6c41d62a Mon Sep 17 00:00:00 2001 From: Fabian Wiesel Date: Tue, 5 May 2026 17:40:34 +0200 Subject: [PATCH 2/3] Migrate OnboardingController to SSA status updates Replace PatchHypervisorStatusWithRetry with Status().Apply(). The Nova ID lookup is inlined into Reconcile and combined with the initial condition in a single apply, avoiding two applies from the same field manager in one reconcile cycle which would cause SSA to prune the IDs. HypervisorID and ServiceID are included in every subsequent apply to retain ownership. --- internal/controller/onboarding_controller.go | 264 ++++++++----------- 1 file changed, 112 insertions(+), 152 deletions(-) diff --git a/internal/controller/onboarding_controller.go b/internal/controller/onboarding_controller.go index 7e9fed71..3c0abd6d 100644 --- a/internal/controller/onboarding_controller.go +++ b/internal/controller/onboarding_controller.go @@ -26,11 +26,11 @@ import ( "time" corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + k8sacmetav1 "k8s.io/client-go/applyconfigurations/meta/v1" "k8s.io/utils/clock" ctrl "sigs.k8s.io/controller-runtime" k8sclient "sigs.k8s.io/controller-runtime/pkg/client" @@ -45,6 +45,7 @@ import ( "github.com/gophercloud/utils/v2/openstack/clientconfig" kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" + apiv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/applyconfigurations/api/v1" "github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/openstack" "github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/utils" ) @@ -99,28 +100,30 @@ func (r *OnboardingController) Reconcile(ctx context.Context, req ctrl.Request) // We bail here out, because the openstack api is not the best to poll if hv.Status.HypervisorID == "" || hv.Status.ServiceID == "" { - if err := r.ensureNovaProperties(ctx, hv); err != nil { + hypervisorID, serviceID, err := r.lookupNovaProperties(ctx, hv) + if err != nil { if errors.Is(err, errRequeue) { return ctrl.Result{RequeueAfter: defaultWaitTime}, nil } return ctrl.Result{}, err } + statusCfg := apiv1.HypervisorStatus(). + WithHypervisorID(hypervisorID). + WithServiceID(serviceID) + statusCfg.Conditions = utils.ConditionsFromStatus(hv.Status.Conditions) + utils.SetApplyConfigurationStatusCondition(&statusCfg.Conditions, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeOnboarding). + WithStatus(metav1.ConditionTrue). + WithReason(kvmv1.ConditionReasonInitial). + WithMessage("Initial onboarding")) + return ctrl.Result{}, r.Status().Apply(ctx, + apiv1.Hypervisor(hv.Name, "").WithStatus(statusCfg), + k8sclient.ForceOwnership, k8sclient.FieldOwner(OnboardingControllerName)) } // check condition reason status := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeOnboarding) - if status == nil { - condition := metav1.Condition{ - Type: kvmv1.ConditionTypeOnboarding, - Status: metav1.ConditionTrue, - Reason: kvmv1.ConditionReasonInitial, - Message: "Initial onboarding", - } - return ctrl.Result{}, utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, condition) - }) - } - switch status.Reason { case kvmv1.ConditionReasonInitial: return ctrl.Result{}, r.initialOnboarding(ctx, hv) @@ -138,6 +141,19 @@ func (r *OnboardingController) Reconcile(ctx context.Context, req ctrl.Request) } } +// applyOnboardingCondition applies a single onboarding condition via SSA, +// carrying all existing conditions and scalar fields forward. +func (r *OnboardingController) applyOnboardingCondition(ctx context.Context, hv *kvmv1.Hypervisor, cond k8sacmetav1.ConditionApplyConfiguration) error { + statusCfg := apiv1.HypervisorStatus(). + WithHypervisorID(hv.Status.HypervisorID). + WithServiceID(hv.Status.ServiceID) + statusCfg.Conditions = utils.ConditionsFromStatus(hv.Status.Conditions) + utils.SetApplyConfigurationStatusCondition(&statusCfg.Conditions, cond) + return r.Status().Apply(ctx, + apiv1.Hypervisor(hv.Name, "").WithStatus(statusCfg), + k8sclient.ForceOwnership, k8sclient.FieldOwner(OnboardingControllerName)) +} + func (r *OnboardingController) abortOnboarding(ctx context.Context, hv *kvmv1.Hypervisor, computeHost, message string) error { status := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeOnboarding) // Never onboarded @@ -145,42 +161,28 @@ func (r *OnboardingController) abortOnboarding(ctx context.Context, hv *kvmv1.Hy return nil } - base := hv.DeepCopy() - - meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeOnboarding, - Status: metav1.ConditionFalse, - Reason: kvmv1.ConditionReasonAborted, - Message: message, - }) - - if equality.Semantic.DeepEqual(hv, base) { - // Already aborted + // Already aborted with the same message — nothing to do + if status.Status == metav1.ConditionFalse && + status.Reason == kvmv1.ConditionReasonAborted && + status.Message == message { return nil } - condition := *meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeOnboarding) if err := r.deleteTestServers(ctx, computeHost); err != nil { - condition = metav1.Condition{ - Type: kvmv1.ConditionTypeOnboarding, - Status: metav1.ConditionTrue, // No cleanup, so we are still "onboarding" - Reason: kvmv1.ConditionReasonAborted, - Message: err.Error(), - } - - meta.SetStatusCondition(&hv.Status.Conditions, condition) - if equality.Semantic.DeepEqual(hv, base) { - return err - } - - return errors.Join(err, utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, condition) - })) - } - - return utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, condition) - }) + return errors.Join(err, r.applyOnboardingCondition(ctx, hv, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeOnboarding). + WithStatus(metav1.ConditionTrue). // No cleanup, so we are still "onboarding" + WithReason(kvmv1.ConditionReasonAborted). + WithMessage(err.Error()))) + } + + return r.applyOnboardingCondition(ctx, hv, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeOnboarding). + WithStatus(metav1.ConditionFalse). + WithReason(kvmv1.ConditionReasonAborted). + WithMessage(message)) } func (r *OnboardingController) initialOnboarding(ctx context.Context, hv *kvmv1.Hypervisor) error { @@ -190,7 +192,6 @@ func (r *OnboardingController) initialOnboarding(ctx context.Context, hv *kvmv1. } // Wait for aggregates controller to apply the desired state (zone and test aggregate) - // Extract aggregate names for comparison currentAggregateNames := make([]string, len(hv.Status.Aggregates)) for i, agg := range hv.Status.Aggregates { currentAggregateNames[i] = agg.Name @@ -212,15 +213,12 @@ func (r *OnboardingController) initialOnboarding(ctx context.Context, hv *kvmv1. return result.Err } - condition := metav1.Condition{ - Type: kvmv1.ConditionTypeOnboarding, - Status: metav1.ConditionTrue, - Reason: kvmv1.ConditionReasonTesting, - Message: "Running onboarding tests", - } - return utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, condition) - }) + return r.applyOnboardingCondition(ctx, hv, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeOnboarding). + WithStatus(metav1.ConditionTrue). + WithReason(kvmv1.ConditionReasonTesting). + WithMessage("Running onboarding tests")) } func (r *OnboardingController) smokeTest(ctx context.Context, hv *kvmv1.Hypervisor, host string) (ctrl.Result, error) { @@ -237,21 +235,17 @@ func (r *OnboardingController) smokeTest(ctx context.Context, hv *kvmv1.Hypervis id := server.ID server, err = servers.Get(ctx, r.testComputeClient, id).Extract() if err != nil { - // should not happened log.Error(err, "failed to get test instance, instance vanished", "id", id) return ctrl.Result{RequeueAfter: defaultWaitTime}, nil } // Set condition back to testing - condition := metav1.Condition{ - Type: kvmv1.ConditionTypeOnboarding, - Status: metav1.ConditionTrue, - Reason: kvmv1.ConditionReasonTesting, - Message: "Server ended up in error state: " + server.Fault.Message, - } - if err = utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, condition) - }); err != nil { + if err = r.applyOnboardingCondition(ctx, hv, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeOnboarding). + WithStatus(metav1.ConditionTrue). + WithReason(kvmv1.ConditionReasonTesting). + WithMessage("Server ended up in error state: "+server.Fault.Message)); err != nil { return ctrl.Result{}, err } @@ -261,37 +255,32 @@ func (r *OnboardingController) smokeTest(ctx context.Context, hv *kvmv1.Hypervis } return ctrl.Result{RequeueAfter: defaultWaitTime}, nil + case "ACTIVE": consoleOutput, err := servers. ShowConsoleOutput(ctx, r.testComputeClient, server.ID, servers.ShowConsoleOutputOpts{Length: 11}). Extract() if err != nil { - condition := metav1.Condition{ - Type: kvmv1.ConditionTypeOnboarding, - Status: metav1.ConditionTrue, - Reason: kvmv1.ConditionReasonTesting, - Message: fmt.Sprintf("could not get console output %v", err), - } - if err := utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, condition) - }); err != nil { - return ctrl.Result{}, err + if err2 := r.applyOnboardingCondition(ctx, hv, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeOnboarding). + WithStatus(metav1.ConditionTrue). + WithReason(kvmv1.ConditionReasonTesting). + WithMessage(fmt.Sprintf("could not get console output %v", err))); err2 != nil { + return ctrl.Result{}, err2 } return ctrl.Result{RequeueAfter: defaultWaitTime}, nil } if !strings.Contains(consoleOutput, server.Name) { if !server.LaunchedAt.IsZero() && r.Clock.Now().After(server.LaunchedAt.Add(smokeTestTimeout)) { - condition := metav1.Condition{ - Type: kvmv1.ConditionTypeOnboarding, - Status: metav1.ConditionTrue, - Reason: kvmv1.ConditionReasonTesting, - Message: fmt.Sprintf("timeout waiting for console output since %v", server.LaunchedAt), - } - if err := utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, condition) - }); err != nil { - return ctrl.Result{}, err + if err2 := r.applyOnboardingCondition(ctx, hv, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeOnboarding). + WithStatus(metav1.ConditionTrue). + WithReason(kvmv1.ConditionReasonTesting). + WithMessage(fmt.Sprintf("timeout waiting for console output since %v", server.LaunchedAt))); err2 != nil { + return ctrl.Result{}, err2 } if err = servers.Delete(ctx, r.testComputeClient, server.ID).ExtractErr(); err != nil { if !gophercloud.ResponseCodeIs(err, http.StatusNotFound) { @@ -303,28 +292,25 @@ func (r *OnboardingController) smokeTest(ctx context.Context, hv *kvmv1.Hypervis } if err = servers.Delete(ctx, r.testComputeClient, server.ID).ExtractErr(); err != nil { - condition := metav1.Condition{ - Type: kvmv1.ConditionTypeOnboarding, - Status: metav1.ConditionTrue, - Reason: kvmv1.ConditionReasonTesting, - Message: fmt.Sprintf("failed to terminate instance %v", err), - } - if err := utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, condition) - }); err != nil { - return ctrl.Result{}, err + if err2 := r.applyOnboardingCondition(ctx, hv, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeOnboarding). + WithStatus(metav1.ConditionTrue). + WithReason(kvmv1.ConditionReasonTesting). + WithMessage(fmt.Sprintf("failed to terminate instance %v", err))); err2 != nil { + return ctrl.Result{}, err2 } return ctrl.Result{RequeueAfter: defaultWaitTime}, nil } return r.completeOnboarding(ctx, host, hv) + default: return ctrl.Result{RequeueAfter: defaultWaitTime}, nil } } func (r *OnboardingController) completeOnboarding(ctx context.Context, host string, hv *kvmv1.Hypervisor) (ctrl.Result, error) { - // Check if we're in the RemovingTestAggregate phase onboardingCondition := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeOnboarding) if onboardingCondition != nil && onboardingCondition.Reason == kvmv1.ConditionReasonHandover { // We're waiting for aggregates and traits controllers to sync @@ -340,44 +326,32 @@ func (r *OnboardingController) completeOnboarding(ctx context.Context, host stri return ctrl.Result{}, nil } - condition := metav1.Condition{ - Type: kvmv1.ConditionTypeOnboarding, - Status: metav1.ConditionFalse, - Reason: kvmv1.ConditionReasonSucceeded, - Message: "Onboarding completed", - } - - return ctrl.Result{}, utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, condition) - }) + return ctrl.Result{}, r.applyOnboardingCondition(ctx, hv, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeOnboarding). + WithStatus(metav1.ConditionFalse). + WithReason(kvmv1.ConditionReasonSucceeded). + WithMessage("Onboarding completed")) } // First time in completeOnboarding - clean up and prepare for aggregate sync - err := r.deleteTestServers(ctx, host) - if err != nil { - condition := metav1.Condition{ - Type: kvmv1.ConditionTypeOnboarding, - Status: metav1.ConditionTrue, // No cleanup, so we are still "onboarding" - Reason: kvmv1.ConditionReasonAborted, - Message: err.Error(), - } - return ctrl.Result{}, errors.Join(err, utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, condition) - })) + if err := r.deleteTestServers(ctx, host); err != nil { + return ctrl.Result{}, errors.Join(err, r.applyOnboardingCondition(ctx, hv, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeOnboarding). + WithStatus(metav1.ConditionTrue). // No cleanup, so we are still "onboarding" + WithReason(kvmv1.ConditionReasonAborted). + WithMessage(err.Error()))) } // Mark onboarding as almost complete, triggers other controllers to do their part - condition := metav1.Condition{ - Type: kvmv1.ConditionTypeOnboarding, - Status: metav1.ConditionTrue, - Reason: kvmv1.ConditionReasonHandover, - Message: "Waiting for other controllers to take over", - } - // Patch status to signal aggregates controller - return ctrl.Result{}, utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, condition) - }) + return ctrl.Result{}, r.applyOnboardingCondition(ctx, hv, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeOnboarding). + WithStatus(metav1.ConditionTrue). + WithReason(kvmv1.ConditionReasonHandover). + WithMessage("Waiting for other controllers to take over")) } func (r *OnboardingController) deleteTestServers(ctx context.Context, host string) error { @@ -409,10 +383,10 @@ func (r *OnboardingController) deleteTestServers(ctx context.Context, host strin return errors.Join(errs...) } -func (r *OnboardingController) ensureNovaProperties(ctx context.Context, hv *kvmv1.Hypervisor) error { +func (r *OnboardingController) lookupNovaProperties(ctx context.Context, hv *kvmv1.Hypervisor) (hypervisorID, serviceID string, err error) { hypervisorAddress := hv.Labels[corev1.LabelHostname] if hypervisorAddress == "" { - return errRequeue + return "", "", errRequeue } shortHypervisorAddress := strings.SplitN(hypervisorAddress, ".", 2)[0] @@ -421,41 +395,27 @@ func (r *OnboardingController) ensureNovaProperties(ctx context.Context, hv *kvm hypervisorPages, err := hypervisors.List(r.computeClient, hypervisorQuery).AllPages(ctx) if err != nil { if gophercloud.ResponseCodeIs(err, http.StatusNotFound) { - return errRequeue + return "", "", errRequeue } - return err + return "", "", err } hs, err := hypervisors.ExtractHypervisors(hypervisorPages) if err != nil { - return err + return "", "", err } if len(hs) < 1 { - return errRequeue + return "", "", errRequeue } - var found = false - var myHypervisor hypervisors.Hypervisor for _, h := range hs { - short := strings.SplitN(h.HypervisorHostname, ".", 2)[0] - if short == shortHypervisorAddress { - myHypervisor = h - found = true - break + if strings.SplitN(h.HypervisorHostname, ".", 2)[0] == shortHypervisorAddress { + return h.ID, h.Service.ID, nil } } - if !found { - return fmt.Errorf("could not find exact match for %v", shortHypervisorAddress) - } - - hypervisorID := myHypervisor.ID - serviceID := myHypervisor.Service.ID - return utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { - h.Status.HypervisorID = hypervisorID - h.Status.ServiceID = serviceID - }) + return "", "", fmt.Errorf("could not find exact match for %v", shortHypervisorAddress) } func (r *OnboardingController) createOrGetTestServer(ctx context.Context, zone, computeHost string, nodeUid types.UID) (*servers.Server, error) { @@ -476,9 +436,9 @@ func (r *OnboardingController) createOrGetTestServer(ctx context.Context, zone, } log := logger.FromContext(ctx) + var foundServer *servers.Server // Cleanup all other server with the same test prefix, except for the exact match // as the cleanup after onboarding may leak resources - var foundServer *servers.Server for _, server := range serverList { // The query is a substring search, we are looking for a prefix if !strings.HasPrefix(server.Name, serverPrefix) { From 311d8e22d068cfefd538d53c340aafdf6163d041 Mon Sep 17 00:00:00 2001 From: Fabian Wiesel Date: Tue, 19 May 2026 13:44:27 +0200 Subject: [PATCH 3/3] Onboarding: keep retrying after instance creation/error failures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the test instance creation in the onboarding smoke test fails, the controller used to return a bare error from Reconcile and never updated the Onboarding condition. Operators saw a stale message frozen for days while the loop kept silently retrying — combined with the SSA condition helper preserving LastTransitionTime when Status does not change, the resource looked stuck even though reconciliation was healthy. This change: - In smokeTest, when createOrGetTestServer fails, set the Onboarding condition to Testing with the current error message and requeue with defaultWaitTime instead of returning the error. This makes each retry observable in the resource itself and matches the style used by every other smokeTest error path. - Include the failed server's UUID in the ERROR-state, console-output-failure, timeout, and terminate-failure messages. A fresh attempt allocates a new instance UUID, so each retry yields a different message even when Nova keeps returning the same fault text preventing SetApplyConfigurationStatusCondition from treating successive failures as no-op updates. - Tests: add Ginkgo specs covering POST /servers returning 500 and a server in ERROR state, asserting the condition message, the requeue, the absence of a returned error, and (for ERROR) that the delete is issued and the UUID is in the message. --- internal/controller/onboarding_controller.go | 27 +++- .../controller/onboarding_controller_test.go | 121 +++++++++++++++++- 2 files changed, 140 insertions(+), 8 deletions(-) diff --git a/internal/controller/onboarding_controller.go b/internal/controller/onboarding_controller.go index 3c0abd6d..c0db7358 100644 --- a/internal/controller/onboarding_controller.go +++ b/internal/controller/onboarding_controller.go @@ -226,7 +226,19 @@ func (r *OnboardingController) smokeTest(ctx context.Context, hv *kvmv1.Hypervis zone := hv.Labels[corev1.LabelTopologyZone] server, err := r.createOrGetTestServer(ctx, zone, host, hv.UID) if err != nil { - return ctrl.Result{}, fmt.Errorf("failed to create or get test instance: %w", err) + // Surface the failure in the Onboarding condition so the user can see + // the latest error and confirm the controller is still retrying. + // Returning a bare error here would leave the previously-set message + // in place and the resource looks frozen even though we keep retrying. + if err2 := r.applyOnboardingCondition(ctx, hv, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeOnboarding). + WithStatus(metav1.ConditionTrue). + WithReason(kvmv1.ConditionReasonTesting). + WithMessage(fmt.Sprintf("failed to create or get test instance: %v", err))); err2 != nil { + return ctrl.Result{}, err2 + } + return ctrl.Result{RequeueAfter: defaultWaitTime}, nil } switch server.Status { @@ -239,13 +251,16 @@ func (r *OnboardingController) smokeTest(ctx context.Context, hv *kvmv1.Hypervis return ctrl.Result{RequeueAfter: defaultWaitTime}, nil } - // Set condition back to testing + // Set condition back to testing. Include the server ID so each retry + // (which creates a fresh instance with a new UUID) yields a different + // message — otherwise SetApplyConfigurationStatusCondition would treat + // the update as a no-op when Nova keeps reporting the same fault text. if err = r.applyOnboardingCondition(ctx, hv, *k8sacmetav1.Condition(). WithType(kvmv1.ConditionTypeOnboarding). WithStatus(metav1.ConditionTrue). WithReason(kvmv1.ConditionReasonTesting). - WithMessage("Server ended up in error state: "+server.Fault.Message)); err != nil { + WithMessage(fmt.Sprintf("Server %s ended up in error state: %s", id, server.Fault.Message))); err != nil { return ctrl.Result{}, err } @@ -266,7 +281,7 @@ func (r *OnboardingController) smokeTest(ctx context.Context, hv *kvmv1.Hypervis WithType(kvmv1.ConditionTypeOnboarding). WithStatus(metav1.ConditionTrue). WithReason(kvmv1.ConditionReasonTesting). - WithMessage(fmt.Sprintf("could not get console output %v", err))); err2 != nil { + WithMessage(fmt.Sprintf("could not get console output for server %s: %v", server.ID, err))); err2 != nil { return ctrl.Result{}, err2 } return ctrl.Result{RequeueAfter: defaultWaitTime}, nil @@ -279,7 +294,7 @@ func (r *OnboardingController) smokeTest(ctx context.Context, hv *kvmv1.Hypervis WithType(kvmv1.ConditionTypeOnboarding). WithStatus(metav1.ConditionTrue). WithReason(kvmv1.ConditionReasonTesting). - WithMessage(fmt.Sprintf("timeout waiting for console output since %v", server.LaunchedAt))); err2 != nil { + WithMessage(fmt.Sprintf("timeout waiting for console output of server %s since %v", server.ID, server.LaunchedAt))); err2 != nil { return ctrl.Result{}, err2 } if err = servers.Delete(ctx, r.testComputeClient, server.ID).ExtractErr(); err != nil { @@ -297,7 +312,7 @@ func (r *OnboardingController) smokeTest(ctx context.Context, hv *kvmv1.Hypervis WithType(kvmv1.ConditionTypeOnboarding). WithStatus(metav1.ConditionTrue). WithReason(kvmv1.ConditionReasonTesting). - WithMessage(fmt.Sprintf("failed to terminate instance %v", err))); err2 != nil { + WithMessage(fmt.Sprintf("failed to terminate instance %s: %v", server.ID, err))); err2 != nil { return ctrl.Result{}, err2 } return ctrl.Result{RequeueAfter: defaultWaitTime}, nil diff --git a/internal/controller/onboarding_controller_test.go b/internal/controller/onboarding_controller_test.go index 8c0003d0..3afd6129 100644 --- a/internal/controller/onboarding_controller_test.go +++ b/internal/controller/onboarding_controller_test.go @@ -336,6 +336,7 @@ var _ = Describe("Onboarding Controller", func() { Context("running tests after initial setup", func() { var ( serverActionHandler func(http.ResponseWriter, *http.Request) + serverCreateHandler func(http.ResponseWriter, *http.Request) serverDeleteHandler func(http.ResponseWriter, *http.Request) serverDetailHandler func(http.ResponseWriter, *http.Request) ) @@ -397,11 +398,14 @@ var _ = Describe("Onboarding Controller", func() { Expect(err).NotTo(HaveOccurred()) }) - fakeServer.Mux.HandleFunc("POST /servers", func(w http.ResponseWriter, r *http.Request) { + serverCreateHandler = func(w http.ResponseWriter, _ *http.Request) { w.Header().Add("Content-Type", "application/json") w.WriteHeader(http.StatusOK) _, err := fmt.Fprint(w, createServerBody) Expect(err).NotTo(HaveOccurred()) + } + fakeServer.Mux.HandleFunc("POST /servers", func(w http.ResponseWriter, r *http.Request) { + serverCreateHandler(w, r) }) serverActionHandler = func(w http.ResponseWriter, _ *http.Request) { @@ -632,13 +636,126 @@ var _ = Describe("Onboarding Controller", func() { By("Verifying the timed-out server was deleted") Expect(serverDeletedCalled).To(BeTrue()) - By("Verifying the onboarding condition message indicates a timeout") + By("Verifying the onboarding condition message indicates a timeout and includes the server ID") hv := &kvmv1.Hypervisor{} Expect(k8sClient.Get(ctx, namespacedName, hv)).To(Succeed()) onboardingCond := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeOnboarding) Expect(onboardingCond).NotTo(BeNil()) Expect(onboardingCond.Reason).To(Equal(kvmv1.ConditionReasonTesting)) Expect(onboardingCond.Message).To(ContainSubstring("timeout")) + Expect(onboardingCond.Message).To(ContainSubstring("server-id")) + }) + }) + + When("test server creation fails", func() { + BeforeEach(func(ctx SpecContext) { + By("Overriding HV status to Testing state") + hv := &kvmv1.Hypervisor{} + Expect(k8sClient.Get(ctx, namespacedName, hv)).To(Succeed()) + meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{ + Type: kvmv1.ConditionTypeOnboarding, + Status: metav1.ConditionTrue, + Reason: kvmv1.ConditionReasonTesting, + Message: "previously stuck on something else", + }) + Expect(k8sClient.Status().Update(ctx, hv)).To(Succeed()) + }) + + It("should surface the error in the Onboarding condition and requeue without returning an error", func(ctx SpecContext) { + By("Overriding the POST /servers handler to return 500") + serverCreateHandler = func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusInternalServerError) + _, err := fmt.Fprint(w, `{"computeFault": {"message": "Cinder volume backend is degraded"}}`) + Expect(err).NotTo(HaveOccurred()) + } + + By("Reconciling: createOrGetTestServer should fail at POST /servers") + result, err := onboardingReconciler.Reconcile(ctx, reconcileReq) + Expect(err).NotTo(HaveOccurred()) + Expect(result.RequeueAfter).To(Equal(defaultWaitTime)) + + By("Verifying the Onboarding condition reflects the create error") + hv := &kvmv1.Hypervisor{} + Expect(k8sClient.Get(ctx, namespacedName, hv)).To(Succeed()) + onboardingCond := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeOnboarding) + Expect(onboardingCond).NotTo(BeNil()) + Expect(onboardingCond.Status).To(Equal(metav1.ConditionTrue)) + Expect(onboardingCond.Reason).To(Equal(kvmv1.ConditionReasonTesting)) + Expect(onboardingCond.Message).To(ContainSubstring("failed to create or get test instance")) + }) + }) + + When("test server is in ERROR state", func() { + var ( + serverGetCalled bool + serverDeleteCalled bool + ) + + BeforeEach(func(ctx SpecContext) { + By("Overriding HV status to Testing state") + hv := &kvmv1.Hypervisor{} + Expect(k8sClient.Get(ctx, namespacedName, hv)).To(Succeed()) + meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{ + Type: kvmv1.ConditionTypeOnboarding, + Status: metav1.ConditionTrue, + Reason: kvmv1.ConditionReasonTesting, + Message: "Running onboarding tests", + }) + Expect(k8sClient.Status().Update(ctx, hv)).To(Succeed()) + + serverName := fmt.Sprintf("%s-%s-%s", testPrefixName, hypervisorName, string(hv.UID)) + serverGetCalled = false + serverDeleteCalled = false + + // GET /servers/detail returns a single ERROR-state server with the + // expected name so createOrGetTestServer returns it as foundServer. + serverDetailHandler = func(w http.ResponseWriter, _ *http.Request) { + _, err := fmt.Fprintf(w, + `{"servers": [{"id": "server-id", "name": %q, "status": "ERROR"}], "servers_links": []}`, + serverName) + Expect(err).NotTo(HaveOccurred()) + } + + // GET /servers/server-id returns the ERROR server with a fault + // message so smokeTest can record it. + fakeServer.Mux.HandleFunc("GET /servers/server-id", func(w http.ResponseWriter, _ *http.Request) { + serverGetCalled = true + w.Header().Add("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, err := fmt.Fprintf(w, + `{"server": {"id": "server-id", "name": %q, "status": "ERROR", "fault": {"message": "Build of instance aborted: volume creation failed"}}}`, + serverName) + Expect(err).NotTo(HaveOccurred()) + }) + + serverDeleteHandler = func(w http.ResponseWriter, _ *http.Request) { + serverDeleteCalled = true + w.WriteHeader(http.StatusAccepted) + } + }) + + It("should record the failure with the server UUID and issue a delete, without returning an error", func(ctx SpecContext) { + By("Reconciling once to enter the ERROR branch") + result, err := onboardingReconciler.Reconcile(ctx, reconcileReq) + Expect(err).NotTo(HaveOccurred()) + Expect(result.RequeueAfter).To(Equal(defaultWaitTime)) + + By("Verifying GET on the server happened so the fault could be read") + Expect(serverGetCalled).To(BeTrue()) + + By("Verifying the server delete was attempted") + Expect(serverDeleteCalled).To(BeTrue()) + + By("Verifying the Onboarding condition message includes the server UUID and the fault text") + hv := &kvmv1.Hypervisor{} + Expect(k8sClient.Get(ctx, namespacedName, hv)).To(Succeed()) + onboardingCond := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeOnboarding) + Expect(onboardingCond).NotTo(BeNil()) + Expect(onboardingCond.Status).To(Equal(metav1.ConditionTrue)) + Expect(onboardingCond.Reason).To(Equal(kvmv1.ConditionReasonTesting)) + Expect(onboardingCond.Message).To(ContainSubstring("server-id")) + Expect(onboardingCond.Message).To(ContainSubstring("ended up in error state")) + Expect(onboardingCond.Message).To(ContainSubstring("Build of instance aborted")) }) })