diff --git a/go.mod b/go.mod index 2a71db918..8b9b1fafc 100644 --- a/go.mod +++ b/go.mod @@ -3,13 +3,13 @@ module github.com/cobaltcore-dev/cortex go 1.26.0 require ( - github.com/cobaltcore-dev/openstack-hypervisor-operator v1.2.2 + github.com/cobaltcore-dev/openstack-hypervisor-operator v1.2.3 github.com/go-gorp/gorp v2.2.0+incompatible github.com/gophercloud/gophercloud/v2 v2.12.0 github.com/ironcore-dev/ironcore v0.3.0 github.com/prometheus/client_golang v1.23.2 github.com/prometheus/client_model v0.6.2 - github.com/sapcc/go-bits v0.0.0-20260526084158-fcb8a0bff0a3 + github.com/sapcc/go-bits v0.0.0-20260529151418-8d2b33444f03 go.xyrillian.de/gg v1.7.0 k8s.io/api v0.36.1 k8s.io/apimachinery v0.36.1 @@ -130,7 +130,7 @@ require ( k8s.io/component-base v0.36.0 // indirect k8s.io/klog/v2 v2.140.0 // indirect k8s.io/kube-openapi v0.0.0-20260317180543-43fb72c5454a // indirect - k8s.io/utils v0.0.0-20260319190234-28399d86e0b5 // indirect + k8s.io/utils v0.0.0-20260507154919-ff6756f316d2 // indirect sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.34.0 // indirect sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect sigs.k8s.io/randfill v1.0.0 // indirect diff --git a/go.sum b/go.sum index 13a221377..90991ed62 100644 --- a/go.sum +++ b/go.sum @@ -20,8 +20,8 @@ github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1x github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/cobaltcore-dev/openstack-hypervisor-operator v1.2.2 h1:qROHDCT/5iwbeUHoFSUeiHwEGaeNnOLYj0OGIlFBu5o= -github.com/cobaltcore-dev/openstack-hypervisor-operator v1.2.2/go.mod h1:vEKwzkDzZwnSd0VRnG+Q1bEzLKe0SWW1ugBAUVqrkY8= +github.com/cobaltcore-dev/openstack-hypervisor-operator v1.2.3 h1:2KjrD+LzU8pgvm1HEIEw5qlRF1gcbFp7Q/t9z+AnY4g= +github.com/cobaltcore-dev/openstack-hypervisor-operator v1.2.3/go.mod h1:DyQ6MO1TsC4kzeG3l8tnIXQVeA/PITm22RyitJhDaKI= github.com/containerd/continuity v0.4.5 h1:ZRoN1sXq9u7V6QoHMcVWGhOwDFqZ4B9i5H6un1Wh0x4= github.com/containerd/continuity v0.4.5/go.mod h1:/lNJvtJKUQStBzpVQ1+rasXO1LAWtUQssk28EZvJ3nE= github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI= @@ -172,10 +172,10 @@ github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7P github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/onsi/ginkgo v1.16.4 h1:29JGrr5oVBm5ulCWet69zQkzWipVXIol6ygQUe/EzNc= -github.com/onsi/ginkgo/v2 v2.28.3 h1:4JvMdwtFU0imd8fHx25OJXoDMRexnf8v5NHKYSTTji4= -github.com/onsi/ginkgo/v2 v2.28.3/go.mod h1:+aXOY+vzZ5mu2iI2HpTZUPmM//oQfsNFX6gU9kNcA44= -github.com/onsi/gomega v1.40.0 h1:Vtol0e1MghCD2ZVIilPDIg44XSL9l2QAn8ZNaljWcJc= -github.com/onsi/gomega v1.40.0/go.mod h1:M/Uqpu/8qTjtzCLUA2zJHX9Iilrau25x1PdoSRbWh5A= +github.com/onsi/ginkgo/v2 v2.29.0 h1:rfh+ZFjgJhYWRoIqVf3Uwx/W20yLrcrE2h2GmYVRaag= +github.com/onsi/ginkgo/v2 v2.29.0/go.mod h1:+aXOY+vzZ5mu2iI2HpTZUPmM//oQfsNFX6gU9kNcA44= +github.com/onsi/gomega v1.41.0 h1:OwKp4pXNgVxf6sCplzYo794OFNuoL2q2SBMU5NSWOjA= +github.com/onsi/gomega v1.41.0/go.mod h1:M/Uqpu/8qTjtzCLUA2zJHX9Iilrau25x1PdoSRbWh5A= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040= @@ -204,8 +204,8 @@ github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7 github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/sapcc/go-api-declarations v1.22.0 h1:nU/eJ6OO54Z9YSo1gWinD0A2etrfZObCwYdB9xA0VWE= github.com/sapcc/go-api-declarations v1.22.0/go.mod h1:x3V8bzg7Y4kmbA+DeWWwKteFEdCCSiVQdwRXj4fGAYY= -github.com/sapcc/go-bits v0.0.0-20260526084158-fcb8a0bff0a3 h1:uf2Szgyh5z4mh4pCp8ZAHdDSaUJfRiKFUr1lU1Fs3oo= -github.com/sapcc/go-bits v0.0.0-20260526084158-fcb8a0bff0a3/go.mod h1:tlX0d8TvLgEikNWwFbB1SxnW0q/6XybpXjt8mr97Qzg= +github.com/sapcc/go-bits v0.0.0-20260529151418-8d2b33444f03 h1:ni4+0WYad/128i2s3lrE2wOwzu1BkhZpWq9ZgTDsjCk= +github.com/sapcc/go-bits v0.0.0-20260529151418-8d2b33444f03/go.mod h1:tlX0d8TvLgEikNWwFbB1SxnW0q/6XybpXjt8mr97Qzg= github.com/sergi/go-diff v1.4.0 h1:n/SP9D5ad1fORl+llWyN+D6qoUETXNZARKjyY2/KVCw= github.com/sergi/go-diff v1.4.0/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepqsbeW4= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= @@ -331,8 +331,8 @@ k8s.io/kube-openapi v0.0.0-20260317180543-43fb72c5454a h1:xCeOEAOoGYl2jnJoHkC3hk k8s.io/kube-openapi v0.0.0-20260317180543-43fb72c5454a/go.mod h1:uGBT7iTA6c6MvqUvSXIaYZo9ukscABYi2btjhvgKGZ0= k8s.io/streaming v0.36.1 h1:L+K68n4Gg940BGNNYtUBvL1WTLL0YnKT3s+P1MNAmR4= k8s.io/streaming v0.36.1/go.mod h1:z6fV3D+NVkoeqRMtWwlUZK6U17SY/LqNzOxWL6GyR/s= -k8s.io/utils v0.0.0-20260319190234-28399d86e0b5 h1:kBawHLSnx/mYHmRnNUf9d4CpjREbeZuxoSGOX/J+aYM= -k8s.io/utils v0.0.0-20260319190234-28399d86e0b5/go.mod h1:xDxuJ0whA3d0I4mf/C4ppKHxXynQ+fxnkmQH0vTHnuk= +k8s.io/utils v0.0.0-20260507154919-ff6756f316d2 h1:wU4tMEhLGgIbLvXQb1cfN+EcM0wf7zC6CPF+C79jroc= +k8s.io/utils v0.0.0-20260507154919-ff6756f316d2/go.mod h1:xDxuJ0whA3d0I4mf/C4ppKHxXynQ+fxnkmQH0vTHnuk= sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.34.0 h1:hSfpvjjTQXQY2Fol2CS0QHMNs/WI1MOSGzCm1KhM5ec= sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.34.0/go.mod h1:Ve9uj1L+deCXFrPOk1LpFXqTg7LCFzFso6PA48q/XZw= sigs.k8s.io/controller-runtime v0.24.1 h1:miPEwrmirImAvgME1L9qebGHrOnGJoVmVdtOU9fRfo4= diff --git a/helm/bundles/cortex-cinder/Chart.yaml b/helm/bundles/cortex-cinder/Chart.yaml index c0c58a69d..f0c93337b 100644 --- a/helm/bundles/cortex-cinder/Chart.yaml +++ b/helm/bundles/cortex-cinder/Chart.yaml @@ -5,7 +5,7 @@ apiVersion: v2 name: cortex-cinder description: A Helm chart deploying Cortex for Cinder. type: application -version: 0.0.72 +version: 0.0.73 appVersion: 0.1.0 dependencies: # from: file://../../library/cortex-postgres @@ -16,12 +16,12 @@ dependencies: # from: file://../../library/cortex - name: cortex repository: oci://ghcr.io/cobaltcore-dev/cortex/charts - version: 0.0.59 + version: 0.0.60 alias: cortex-knowledge-controllers # from: file://../../library/cortex - name: cortex repository: oci://ghcr.io/cobaltcore-dev/cortex/charts - version: 0.0.59 + version: 0.0.60 alias: cortex-scheduling-controllers # Owner info adds a configmap to the kubernetes cluster with information on diff --git a/helm/bundles/cortex-crds/Chart.yaml b/helm/bundles/cortex-crds/Chart.yaml index 4e841e56f..6e329e03f 100644 --- a/helm/bundles/cortex-crds/Chart.yaml +++ b/helm/bundles/cortex-crds/Chart.yaml @@ -5,13 +5,13 @@ apiVersion: v2 name: cortex-crds description: A Helm chart deploying Cortex CRDs. type: application -version: 0.0.72 +version: 0.0.73 appVersion: 0.1.0 dependencies: # from: file://../../library/cortex - name: cortex repository: oci://ghcr.io/cobaltcore-dev/cortex/charts - version: 0.0.59 + version: 0.0.60 # Owner info adds a configmap to the kubernetes cluster with information on # the service owner. This makes it easier to find out who to contact in case diff --git a/helm/bundles/cortex-ironcore/Chart.yaml b/helm/bundles/cortex-ironcore/Chart.yaml index 5ffa84cbe..2857c7c40 100644 --- a/helm/bundles/cortex-ironcore/Chart.yaml +++ b/helm/bundles/cortex-ironcore/Chart.yaml @@ -5,13 +5,13 @@ apiVersion: v2 name: cortex-ironcore description: A Helm chart deploying Cortex for IronCore. type: application -version: 0.0.72 +version: 0.0.73 appVersion: 0.1.0 dependencies: # from: file://../../library/cortex - name: cortex repository: oci://ghcr.io/cobaltcore-dev/cortex/charts - version: 0.0.59 + version: 0.0.60 # Owner info adds a configmap to the kubernetes cluster with information on # the service owner. This makes it easier to find out who to contact in case diff --git a/helm/bundles/cortex-manila/Chart.yaml b/helm/bundles/cortex-manila/Chart.yaml index 3ad0a358e..42cdc2df0 100644 --- a/helm/bundles/cortex-manila/Chart.yaml +++ b/helm/bundles/cortex-manila/Chart.yaml @@ -5,7 +5,7 @@ apiVersion: v2 name: cortex-manila description: A Helm chart deploying Cortex for Manila. type: application -version: 0.0.72 +version: 0.0.73 appVersion: 0.1.0 dependencies: # from: file://../../library/cortex-postgres @@ -16,12 +16,12 @@ dependencies: # from: file://../../library/cortex - name: cortex repository: oci://ghcr.io/cobaltcore-dev/cortex/charts - version: 0.0.59 + version: 0.0.60 alias: cortex-knowledge-controllers # from: file://../../library/cortex - name: cortex repository: oci://ghcr.io/cobaltcore-dev/cortex/charts - version: 0.0.59 + version: 0.0.60 alias: cortex-scheduling-controllers # Owner info adds a configmap to the kubernetes cluster with information on diff --git a/helm/bundles/cortex-nova/Chart.yaml b/helm/bundles/cortex-nova/Chart.yaml index e5b8da70f..35989568d 100644 --- a/helm/bundles/cortex-nova/Chart.yaml +++ b/helm/bundles/cortex-nova/Chart.yaml @@ -5,7 +5,7 @@ apiVersion: v2 name: cortex-nova description: A Helm chart deploying Cortex for Nova. type: application -version: 0.0.72 +version: 0.0.73 appVersion: 0.1.0 dependencies: # from: file://../../library/cortex-postgres @@ -16,12 +16,12 @@ dependencies: # from: file://../../library/cortex - name: cortex repository: oci://ghcr.io/cobaltcore-dev/cortex/charts - version: 0.0.59 + version: 0.0.60 alias: cortex-knowledge-controllers # from: file://../../library/cortex - name: cortex repository: oci://ghcr.io/cobaltcore-dev/cortex/charts - version: 0.0.59 + version: 0.0.60 alias: cortex-scheduling-controllers # Owner info adds a configmap to the kubernetes cluster with information on diff --git a/helm/bundles/cortex-pods/Chart.yaml b/helm/bundles/cortex-pods/Chart.yaml index 822ca5942..661718a4a 100644 --- a/helm/bundles/cortex-pods/Chart.yaml +++ b/helm/bundles/cortex-pods/Chart.yaml @@ -5,13 +5,13 @@ apiVersion: v2 name: cortex-pods description: A Helm chart deploying Cortex for Pods. type: application -version: 0.0.72 +version: 0.0.73 appVersion: 0.1.0 dependencies: # from: file://../../library/cortex - name: cortex repository: oci://ghcr.io/cobaltcore-dev/cortex/charts - version: 0.0.59 + version: 0.0.60 # Owner info adds a configmap to the kubernetes cluster with information on # the service owner. This makes it easier to find out who to contact in case diff --git a/helm/library/cortex/Chart.yaml b/helm/library/cortex/Chart.yaml index 0d41ef42b..0fe64530a 100644 --- a/helm/library/cortex/Chart.yaml +++ b/helm/library/cortex/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: cortex description: A Helm chart to distribute cortex. type: application -version: 0.0.59 -appVersion: "sha-fefb9f83" +version: 0.0.60 +appVersion: "sha-12c6f24d" icon: "https://example.com/icon.png" dependencies: [] diff --git a/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go b/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go index 6b2f3ff06..edad1f088 100644 --- a/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go +++ b/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go @@ -12,6 +12,7 @@ import ( api "github.com/cobaltcore-dev/cortex/api/external/nova" "github.com/cobaltcore-dev/cortex/api/v1alpha1" "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" + resv "github.com/cobaltcore-dev/cortex/internal/scheduling/reservations" hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" "k8s.io/apimachinery/pkg/api/resource" ) @@ -198,7 +199,7 @@ func (s *FilterHasEnoughCapacity) Run(traceLog *slog.Logger, request api.Externa continue } - // For CR reservations with allocations, compute the effective block: + // CommittedResourceReservations: compute the effective block: // confirmed = sum of resources for VMs present in both Spec and Status allocations // specOnly = sum of resources for VMs present in Spec but not yet in Status // remaining = max(0, Spec.Resources - confirmed) [clamped: never negative] @@ -206,61 +207,9 @@ func (s *FilterHasEnoughCapacity) Run(traceLog *slog.Logger, request api.Externa // // Clamping: if confirmed VMs exceed slot size (e.g. after resize), block = 0. // Oversize spec-only: if a pending VM is larger than the remaining slot, block its full size. - var resourcesToBlock map[hv1.ResourceName]resource.Quantity - if reservation.Spec.Type == v1alpha1.ReservationTypeCommittedResource && - // When ignoring allocations (empty-datacenter scenario) VM resources are not - // deducted, so the confirmed-VM adjustment would under-block: always use the - // full slot instead. - !s.Options.IgnoreAllocations && - // if the reservation is not being migrated, block only unused resources - reservation.Spec.TargetHost == reservation.Status.Host && - reservation.Spec.CommittedResourceReservation != nil && - len(reservation.Spec.CommittedResourceReservation.Allocations) > 0 { - confirmedResources := make(map[hv1.ResourceName]resource.Quantity) - specOnlyResources := make(map[hv1.ResourceName]resource.Quantity) - - statusAllocs := map[string]string{} - if reservation.Status.CommittedResourceReservation != nil { - statusAllocs = reservation.Status.CommittedResourceReservation.Allocations - } - - for instanceUUID, allocation := range reservation.Spec.CommittedResourceReservation.Allocations { - _, isConfirmed := statusAllocs[instanceUUID] - for resourceName, quantity := range allocation.Resources { - if isConfirmed { - existing := confirmedResources[resourceName] - existing.Add(quantity) - confirmedResources[resourceName] = existing - } else { - existing := specOnlyResources[resourceName] - existing.Add(quantity) - specOnlyResources[resourceName] = existing - } - } - } - - resourcesToBlock = make(map[hv1.ResourceName]resource.Quantity) - zero := resource.Quantity{} - for resourceName, slotSize := range reservation.Spec.Resources { - confirmed := confirmedResources[resourceName] - specOnly := specOnlyResources[resourceName] - - remaining := slotSize.DeepCopy() - remaining.Sub(confirmed) - if remaining.Cmp(zero) < 0 { - remaining = zero.DeepCopy() - } - - if specOnly.Cmp(remaining) > 0 { - resourcesToBlock[resourceName] = specOnly.DeepCopy() - } else { - resourcesToBlock[resourceName] = remaining - } - } - } else { - // For other reservation types or CR without allocations, block full resources - resourcesToBlock = reservation.Spec.Resources - } + // + // FailoverReservations: block = Spec.Resources (always fully blocked). + resourcesToBlock := resv.UnusedReservationCapacity(&reservation, s.Options.IgnoreAllocations) // Block the calculated resources on each host for host := range hostsToBlock { diff --git a/internal/scheduling/reservations/capacity/controller.go b/internal/scheduling/reservations/capacity/controller.go index 8f7992ca1..f12a86a71 100644 --- a/internal/scheduling/reservations/capacity/controller.go +++ b/internal/scheduling/reservations/capacity/controller.go @@ -86,10 +86,17 @@ func (c *Controller) reconcileAll(ctx context.Context) error { azs := availabilityZones(hvList.Items) + // Compute reservation memory blocks once per cycle — shared across all (group × AZ) pairs. + blockedByReservations, err := c.blockedMemoryByHost(ctx) + if err != nil { + logger.Error(err, "failed to compute blocked memory by host, placeable slot counts may be overstated") + blockedByReservations = map[string]int64{} + } + var succeeded, failed int for groupName, groupData := range flavorGroups { for _, az := range azs { - if err := c.reconcileOne(ctx, groupName, groupData, az, hvByName, hvList.Items); err != nil { + if err := c.reconcileOne(ctx, groupName, groupData, az, hvByName, hvList.Items, blockedByReservations); err != nil { logger.Error(err, "failed to reconcile flavor group capacity", "flavorGroup", groupName, "az", az) failed++ @@ -118,6 +125,7 @@ func (c *Controller) reconcileOne( az string, hvByName map[string]hv1.Hypervisor, allHVs []hv1.Hypervisor, + blockedByReservations map[string]int64, ) error { smallestFlavorBytes := int64(groupData.SmallestFlavor.MemoryMB) * 1024 * 1024 //nolint:gosec @@ -162,8 +170,8 @@ func (c *Controller) reconcileOne( cur := existingByName[flavor.Name] cur.FlavorName = flavor.Name - totalVMSlots, totalHosts, totalErr := c.probeScheduler(ctx, flavor, az, c.config.TotalPipeline, hvByName, true) - placeableVMs, placeableHosts, placeableErr := c.probeScheduler(ctx, flavor, az, c.config.PlaceablePipeline, hvByName, false) + totalVMSlots, totalHosts, totalErr := c.probeScheduler(ctx, flavor, az, c.config.TotalPipeline, hvByName, true, nil) + placeableVMs, placeableHosts, placeableErr := c.probeScheduler(ctx, flavor, az, c.config.PlaceablePipeline, hvByName, false, blockedByReservations) if totalErr != nil { allFresh = false @@ -258,14 +266,15 @@ func (c *Controller) reconcileOne( // probeScheduler calls the scheduler with the given pipeline and returns VM slots + host count. // Capacity is computed as sum of floor(hostMemory / flavorMemory) across returned hosts. // When ignoreAllocations is true (total/empty-datacenter probe), raw effective capacity is used. -// When false (placeable probe), hv.Status.Allocation is subtracted first so that slots reflect -// remaining capacity after running VMs. +// When false (placeable probe), hv.Status.Allocation and blockedByReservations are subtracted so +// that slots reflect remaining capacity after running VMs and active reservation blocks. func (c *Controller) probeScheduler( ctx context.Context, flavor compute.FlavorInGroup, az, pipeline string, hvByName map[string]hv1.Hypervisor, ignoreAllocations bool, + blockedByReservations map[string]int64, ) (capacity, hosts int64, err error) { flavorBytes := int64(flavor.MemoryMB) * 1024 * 1024 //nolint:gosec @@ -318,6 +327,7 @@ func (c *Controller) probeScheduler( if alloc, ok := hv.Status.Allocation[hv1.ResourceMemory]; ok { capBytes -= alloc.Value() } + capBytes -= blockedByReservations[hostName] if capBytes < 0 { capBytes = 0 } @@ -329,6 +339,43 @@ func (c *Controller) probeScheduler( return capacity, hosts, nil } +// blockedMemoryByHost lists all Reservations and returns the total bytes blocked per host name. +// Only placed reservations (TargetHost or Status.Host non-empty) are counted. +// When a reservation is being migrated (TargetHost != Status.Host), both hosts are blocked. +func (c *Controller) blockedMemoryByHost(ctx context.Context) (map[string]int64, error) { + var list v1alpha1.ReservationList + if err := c.client.List(ctx, &list); err != nil { + return nil, fmt.Errorf("failed to list reservations: %w", err) + } + + blocked := make(map[string]int64) + for i := range list.Items { + res := &list.Items[i] + + hostsToBlock := make(map[string]struct{}) + if res.Spec.TargetHost != "" { + hostsToBlock[res.Spec.TargetHost] = struct{}{} + } + if res.Status.Host != "" { + hostsToBlock[res.Status.Host] = struct{}{} + } + if len(hostsToBlock) == 0 { + continue + } + + resourcesToBlock := reservations.UnusedReservationCapacity(res, false) + memQty, ok := resourcesToBlock[hv1.ResourceMemory] + if !ok { + continue + } + memBytes := memQty.Value() + for host := range hostsToBlock { + blocked[host] += memBytes + } + } + return blocked, nil +} + // sumCommittedCapacity sums AcceptedSpec.Amount (or Spec.Amount as fallback) across all // CommittedResource CRDs for the given (flavorGroup, az) pair with an active state // (guaranteed or confirmed) and resource type memory. Returns the total in slots. diff --git a/internal/scheduling/reservations/capacity/controller_test.go b/internal/scheduling/reservations/capacity/controller_test.go index 8938b8564..8e25ff644 100644 --- a/internal/scheduling/reservations/capacity/controller_test.go +++ b/internal/scheduling/reservations/capacity/controller_test.go @@ -226,7 +226,7 @@ func TestReconcileOne_CreatesCRD(t *testing.T) { } hvByName := map[string]hv1.Hypervisor{"host-1": *hv} - if err := ctrl.reconcileOne(context.Background(), groupName, groupData, az, hvByName, []hv1.Hypervisor{*hv}); err != nil { + if err := ctrl.reconcileOne(context.Background(), groupName, groupData, az, hvByName, []hv1.Hypervisor{*hv}, map[string]int64{}); err != nil { t.Fatalf("reconcileOne failed: %v", err) } @@ -293,7 +293,7 @@ func TestReconcileOne_SetsReadyConditionFalseOnSchedulerError(t *testing.T) { } // reconcileOne returns no error itself (it continues on probe failure), but sets Ready=False - if err := ctrl.reconcileOne(context.Background(), groupName, groupData, az, map[string]hv1.Hypervisor{}, []hv1.Hypervisor{}); err != nil { + if err := ctrl.reconcileOne(context.Background(), groupName, groupData, az, map[string]hv1.Hypervisor{}, []hv1.Hypervisor{}, map[string]int64{}); err != nil { t.Fatalf("reconcileOne failed: %v", err) } @@ -358,11 +358,11 @@ func TestReconcileOne_IdempotentUpdate(t *testing.T) { hvByName := map[string]hv1.Hypervisor{"host-1": *hv} // First call - if err := ctrl.reconcileOne(context.Background(), groupName, groupData, az, hvByName, []hv1.Hypervisor{*hv}); err != nil { + if err := ctrl.reconcileOne(context.Background(), groupName, groupData, az, hvByName, []hv1.Hypervisor{*hv}, map[string]int64{}); err != nil { t.Fatalf("first reconcileOne failed: %v", err) } // Second call — should not error on the already-existing CRD - if err := ctrl.reconcileOne(context.Background(), groupName, groupData, az, hvByName, []hv1.Hypervisor{*hv}); err != nil { + if err := ctrl.reconcileOne(context.Background(), groupName, groupData, az, hvByName, []hv1.Hypervisor{*hv}, map[string]int64{}); err != nil { t.Fatalf("second reconcileOne failed: %v", err) } @@ -429,7 +429,7 @@ func TestProbeScheduler_CapacityCalculation(t *testing.T) { } flavor := compute.FlavorInGroup{Name: "test-flavor", MemoryMB: memMB} - capacity, hosts, err := c.probeScheduler(context.Background(), flavor, "az-a", "test-pipeline", hvByName, true) + capacity, hosts, err := c.probeScheduler(context.Background(), flavor, "az-a", "test-pipeline", hvByName, true, nil) if err != nil { t.Fatalf("probeScheduler failed: %v", err) } @@ -467,7 +467,7 @@ func TestProbeScheduler_SubtractsAllocationsWhenNotIgnored(t *testing.T) { flavor := compute.FlavorInGroup{Name: "test-flavor", MemoryMB: memMB} // Total probe (ignoreAllocations=true): raw capacity → 2 slots. - totalCap, _, err := c.probeScheduler(context.Background(), flavor, "az-a", "total-pipeline", hvByName, true) + totalCap, _, err := c.probeScheduler(context.Background(), flavor, "az-a", "total-pipeline", hvByName, true, nil) if err != nil { t.Fatalf("probeScheduler (total) failed: %v", err) } @@ -476,7 +476,7 @@ func TestProbeScheduler_SubtractsAllocationsWhenNotIgnored(t *testing.T) { } // Placeable probe (ignoreAllocations=false): capacity − allocation → 1 slot. - placeableCap, _, err := c.probeScheduler(context.Background(), flavor, "az-a", "placeable-pipeline", hvByName, false) + placeableCap, _, err := c.probeScheduler(context.Background(), flavor, "az-a", "placeable-pipeline", hvByName, false, nil) if err != nil { t.Fatalf("probeScheduler (placeable) failed: %v", err) } @@ -576,7 +576,7 @@ func TestReconcileOne_ZeroMemoryFlavorReturnsError(t *testing.T) { groupData := compute.FlavorGroupFeature{ SmallestFlavor: compute.FlavorInGroup{Name: "bad-flavor", MemoryMB: 0}, } - err := c.reconcileOne(context.Background(), "hana-v2", groupData, "az-a", nil, nil) + err := c.reconcileOne(context.Background(), "hana-v2", groupData, "az-a", nil, nil, nil) if err == nil { t.Error("expected error for zero-memory flavor") } @@ -648,3 +648,47 @@ func TestSumCommittedCapacity(t *testing.T) { t.Errorf("sumCommittedCapacity = %d, want 3", got) } } + +// TestProbeScheduler_SubtractsReservationBlocksWhenNotIgnored verifies that placeable-probe +// slot counting subtracts per-host reservation blocks in addition to hv.Status.Allocation. +func TestProbeScheduler_SubtractsReservationBlocksWhenNotIgnored(t *testing.T) { + const memMB = 4096 + const memBytes = int64(memMB) * 1024 * 1024 + + scheme := newTestScheme(t) + + // Host has 3-slot capacity (3 × flavor), 1 slot used by running VM, 1 slot blocked by reservation. + hv := newHypervisor("host-1", "az-a", memBytes*3) + hv.Status.Allocation = map[hv1.ResourceName]resource.Quantity{ + hv1.ResourceMemory: *resource.NewQuantity(memBytes, resource.BinarySI), + } + + fakeClient := fake.NewClientBuilder().WithScheme(scheme).Build() + srv := newMockSchedulerServer(t, []string{"host-1"}) + defer srv.Close() + + c := NewController(fakeClient, Config{SchedulerURL: srv.URL}) + hvByName := map[string]hv1.Hypervisor{"host-1": *hv} + flavor := compute.FlavorInGroup{Name: "test-flavor", MemoryMB: memMB} + + // Total probe: raw 3 slots, no subtraction. + totalCap, _, err := c.probeScheduler(context.Background(), flavor, "az-a", "total-pipeline", hvByName, true, nil) + if err != nil { + t.Fatalf("probeScheduler (total) failed: %v", err) + } + if totalCap != 3 { + t.Errorf("total capacity = %d, want 3", totalCap) + } + + // Placeable probe with 1 reservation block: 3 - 1 (alloc) - 1 (reservation) = 1 slot. + blockedByReservations := map[string]int64{ + "host-1": memBytes, // 1 reservation blocking 1 slot's worth of memory + } + placeableCap, _, err := c.probeScheduler(context.Background(), flavor, "az-a", "placeable-pipeline", hvByName, false, blockedByReservations) + if err != nil { + t.Fatalf("probeScheduler (placeable) failed: %v", err) + } + if placeableCap != 1 { + t.Errorf("placeable capacity = %d, want 1 (3 slots − 1 alloc − 1 reservation)", placeableCap) + } +} diff --git a/internal/scheduling/reservations/capacity_accounting.go b/internal/scheduling/reservations/capacity_accounting.go new file mode 100644 index 000000000..2ccca9685 --- /dev/null +++ b/internal/scheduling/reservations/capacity_accounting.go @@ -0,0 +1,75 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package reservations + +import ( + hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" + "k8s.io/apimachinery/pkg/api/resource" + + "github.com/cobaltcore-dev/cortex/api/v1alpha1" +) + +// UnusedReservationCapacity returns the resources a Reservation should block on its host(s). +// This is the single source of truth used by both the capacity controller and +// filter_has_enough_capacity to ensure consistent accounting. +// +// CommittedResourceReservations: confirmed VMs already appear in hv.Status.Allocation, +// so blocking the full slot would double-count them. The effective block is: +// max(slot − confirmedVMs, specOnlyVMs), clamped to zero. Skipped (full slot used) when +// ignoreAllocations is true or when mid-migration (TargetHost != Status.Host). +// +// FailoverReservations: always block the full Spec.Resources. +func UnusedReservationCapacity(res *v1alpha1.Reservation, ignoreAllocations bool) map[hv1.ResourceName]resource.Quantity { + if res.Spec.Type == v1alpha1.ReservationTypeCommittedResource && + !ignoreAllocations && + res.Spec.TargetHost == res.Status.Host && + res.Spec.CommittedResourceReservation != nil && + len(res.Spec.CommittedResourceReservation.Allocations) > 0 { + confirmedResources := make(map[hv1.ResourceName]resource.Quantity) + specOnlyResources := make(map[hv1.ResourceName]resource.Quantity) + + statusAllocs := map[string]string{} + if res.Status.CommittedResourceReservation != nil { + statusAllocs = res.Status.CommittedResourceReservation.Allocations + } + + for instanceUUID, allocation := range res.Spec.CommittedResourceReservation.Allocations { + _, isConfirmed := statusAllocs[instanceUUID] + for resourceName, quantity := range allocation.Resources { + if isConfirmed { + existing := confirmedResources[resourceName] + existing.Add(quantity) + confirmedResources[resourceName] = existing + } else { + existing := specOnlyResources[resourceName] + existing.Add(quantity) + specOnlyResources[resourceName] = existing + } + } + } + + result := make(map[hv1.ResourceName]resource.Quantity) + zero := resource.Quantity{} + for resourceName, slotSize := range res.Spec.Resources { + confirmed := confirmedResources[resourceName] + specOnly := specOnlyResources[resourceName] + + remaining := slotSize.DeepCopy() + remaining.Sub(confirmed) + if remaining.Cmp(zero) < 0 { + remaining = zero.DeepCopy() + } + + if specOnly.Cmp(remaining) > 0 { + result[resourceName] = specOnly.DeepCopy() + } else { + result[resourceName] = remaining + } + } + return result + } else { + // FailoverReservations are always fully blocked and unused. + return res.Spec.Resources + } +} diff --git a/internal/scheduling/reservations/capacity_accounting_test.go b/internal/scheduling/reservations/capacity_accounting_test.go new file mode 100644 index 000000000..815a0a07f --- /dev/null +++ b/internal/scheduling/reservations/capacity_accounting_test.go @@ -0,0 +1,172 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package reservations + +import ( + "testing" + + hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" + "k8s.io/apimachinery/pkg/api/resource" + + "github.com/cobaltcore-dev/cortex/api/v1alpha1" +) + +func TestUnusedReservationCapacity(t *testing.T) { + gib := func(n int64) resource.Quantity { return *resource.NewQuantity(n*1024*1024*1024, resource.BinarySI) } + memBytes := func(m map[hv1.ResourceName]resource.Quantity) int64 { + q, ok := m[hv1.ResourceMemory] + if !ok { + return 0 + } + return q.Value() + } + + tests := []struct { + name string + res *v1alpha1.Reservation + ignoreAllocations bool + wantMemoryBytes int64 + }{ + { + name: "failover: full slot blocked", + res: &v1alpha1.Reservation{ + Spec: v1alpha1.ReservationSpec{ + Type: v1alpha1.ReservationTypeFailover, + Resources: map[hv1.ResourceName]resource.Quantity{hv1.ResourceMemory: gib(480)}, + }, + }, + wantMemoryBytes: 480 * 1024 * 1024 * 1024, + }, + { + name: "CR no allocations: full slot blocked", + res: &v1alpha1.Reservation{ + Spec: v1alpha1.ReservationSpec{ + Type: v1alpha1.ReservationTypeCommittedResource, + Resources: map[hv1.ResourceName]resource.Quantity{hv1.ResourceMemory: gib(480)}, + CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{}, + }, + }, + wantMemoryBytes: 480 * 1024 * 1024 * 1024, + }, + { + name: "CR 1 confirmed VM (240Gi), slot=480Gi: remaining = 240Gi blocked", + res: &v1alpha1.Reservation{ + Spec: v1alpha1.ReservationSpec{ + Type: v1alpha1.ReservationTypeCommittedResource, + Resources: map[hv1.ResourceName]resource.Quantity{hv1.ResourceMemory: gib(480)}, + CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ + Allocations: map[string]v1alpha1.CommittedResourceAllocation{ + "vm-1": {Resources: map[hv1.ResourceName]resource.Quantity{hv1.ResourceMemory: gib(240)}}, + }, + }, + }, + Status: v1alpha1.ReservationStatus{ + CommittedResourceReservation: &v1alpha1.CommittedResourceReservationStatus{ + Allocations: map[string]string{"vm-1": "host-a"}, + }, + }, + }, + wantMemoryBytes: 240 * 1024 * 1024 * 1024, + }, + { + name: "CR slot fully consumed by confirmed VMs: block = 0", + res: &v1alpha1.Reservation{ + Spec: v1alpha1.ReservationSpec{ + Type: v1alpha1.ReservationTypeCommittedResource, + Resources: map[hv1.ResourceName]resource.Quantity{hv1.ResourceMemory: gib(480)}, + CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ + Allocations: map[string]v1alpha1.CommittedResourceAllocation{ + "vm-1": {Resources: map[hv1.ResourceName]resource.Quantity{hv1.ResourceMemory: gib(240)}}, + "vm-2": {Resources: map[hv1.ResourceName]resource.Quantity{hv1.ResourceMemory: gib(240)}}, + }, + }, + }, + Status: v1alpha1.ReservationStatus{ + CommittedResourceReservation: &v1alpha1.CommittedResourceReservationStatus{ + Allocations: map[string]string{"vm-1": "host-a", "vm-2": "host-a"}, + }, + }, + }, + wantMemoryBytes: 0, + }, + { + name: "CR spec-only VM (240Gi), slot=480Gi, no confirmed: specOnly < remaining → full slot blocked", + res: &v1alpha1.Reservation{ + Spec: v1alpha1.ReservationSpec{ + Type: v1alpha1.ReservationTypeCommittedResource, + Resources: map[hv1.ResourceName]resource.Quantity{hv1.ResourceMemory: gib(480)}, + CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ + Allocations: map[string]v1alpha1.CommittedResourceAllocation{ + "vm-1": {Resources: map[hv1.ResourceName]resource.Quantity{hv1.ResourceMemory: gib(240)}}, + }, + }, + }, + // vm-1 not in status → spec-only + }, + wantMemoryBytes: 480 * 1024 * 1024 * 1024, + }, + { + name: "CR mid-migration (TargetHost != Status.Host): full slot blocked despite confirmed VMs", + res: &v1alpha1.Reservation{ + Spec: v1alpha1.ReservationSpec{ + Type: v1alpha1.ReservationTypeCommittedResource, + TargetHost: "new-host", + Resources: map[hv1.ResourceName]resource.Quantity{hv1.ResourceMemory: gib(480)}, + CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ + Allocations: map[string]v1alpha1.CommittedResourceAllocation{ + "vm-1": {Resources: map[hv1.ResourceName]resource.Quantity{hv1.ResourceMemory: gib(240)}}, + }, + }, + }, + Status: v1alpha1.ReservationStatus{ + Host: "old-host", // differs from TargetHost → migration in progress + CommittedResourceReservation: &v1alpha1.CommittedResourceReservationStatus{ + Allocations: map[string]string{"vm-1": "old-host"}, + }, + }, + }, + wantMemoryBytes: 480 * 1024 * 1024 * 1024, + }, + { + name: "CR ignoreAllocations=true: full slot blocked regardless of confirmed VMs", + res: &v1alpha1.Reservation{ + Spec: v1alpha1.ReservationSpec{ + Type: v1alpha1.ReservationTypeCommittedResource, + Resources: map[hv1.ResourceName]resource.Quantity{hv1.ResourceMemory: gib(480)}, + CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ + Allocations: map[string]v1alpha1.CommittedResourceAllocation{ + "vm-1": {Resources: map[hv1.ResourceName]resource.Quantity{hv1.ResourceMemory: gib(240)}}, + }, + }, + }, + Status: v1alpha1.ReservationStatus{ + CommittedResourceReservation: &v1alpha1.CommittedResourceReservationStatus{ + Allocations: map[string]string{"vm-1": "host-a"}, + }, + }, + }, + ignoreAllocations: true, + wantMemoryBytes: 480 * 1024 * 1024 * 1024, + }, + { + name: "no memory resource: block = 0", + res: &v1alpha1.Reservation{ + Spec: v1alpha1.ReservationSpec{ + Type: v1alpha1.ReservationTypeFailover, + Resources: map[hv1.ResourceName]resource.Quantity{}, + }, + }, + wantMemoryBytes: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := memBytes(UnusedReservationCapacity(tt.res, tt.ignoreAllocations)) + if got != tt.wantMemoryBytes { + t.Errorf("UnusedReservationCapacity() memory = %d, want %d", got, tt.wantMemoryBytes) + } + }) + } +} diff --git a/internal/scheduling/reservations/quota/controller.go b/internal/scheduling/reservations/quota/controller.go index f808fbbec..186ae50b5 100644 --- a/internal/scheduling/reservations/quota/controller.go +++ b/internal/scheduling/reservations/quota/controller.go @@ -145,7 +145,7 @@ func (c *QuotaController) ReconcilePeriodic(ctx context.Context) error { paygUsage := derivePaygUsage(projectTotalUsage, crUsage) // Write status with conflict retry (full reconcile sets LastFullReconcileAt) - if err := c.updateProjectQuotaStatusWithRetry(ctx, pq.Name, projectTotalUsage, paygUsage, true); err != nil { + if err := c.updateProjectQuotaStatusWithRetry(ctx, pq.Name, projectTotalUsage, paygUsage, true, flavorGroups); err != nil { logger.Error(err, "failed to update ProjectQuota status", "project", projectID) skipped++ continue @@ -252,7 +252,7 @@ func (c *QuotaController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl paygUsage := derivePaygUsage(totalUsage, crUsage) // Write updated status with conflict retry - if err := c.updateProjectQuotaStatusWithRetry(ctx, pq.Name, totalUsage, paygUsage, specChanged); err != nil { + if err := c.updateProjectQuotaStatusWithRetry(ctx, pq.Name, totalUsage, paygUsage, specChanged, flavorGroups); err != nil { logger.Error(err, "failed to update ProjectQuota status") return ctrl.Result{}, err } @@ -634,8 +634,8 @@ func (c *QuotaController) applyDeltaAndUpdateStatus( // Update human-readable summaries for wide kubectl output pq.Status.TotalUsageSummary = buildUsageSummary(pq.Status.TotalUsage) pq.Status.PaygUsageSummary = buildUsageSummary(pq.Status.PaygUsage) - pq.Status.LimesUsageSummary = buildLimesSummary(pq.Status.TotalUsage, c.Config.FlavorGroupResourceConfig) - pq.Status.LimesQuotaSummary = buildLimesSummary(pq.Spec.Quota, c.Config.FlavorGroupResourceConfig) + pq.Status.LimesUsageSummary = buildLimesSummary(pq.Status.TotalUsage, flavorGroups) + pq.Status.LimesQuotaSummary = buildLimesSummary(pq.Spec.Quota, flavorGroups) now := metav1.Now() pq.Status.LastReconcileAt = &now @@ -939,13 +939,14 @@ func expandAZSlice(flat map[string]int64, az string) map[string]map[string]int64 // totalUsage and paygUsage are multi-AZ maps; this function extracts the relevant AZ // slice based on the CRD's Spec.AvailabilityZone. // If fullReconcile is true, also updates LastFullReconcileAt and ObservedGeneration. -// flavorGroups is used to compute Limes unit summaries (may be nil to skip). +// flavorGroups is used to compute Limes unit summaries. func (c *QuotaController) updateProjectQuotaStatusWithRetry( ctx context.Context, pqName string, totalUsage map[string]map[string]int64, paygUsage map[string]map[string]int64, fullReconcile bool, + flavorGroups map[string]compute.FlavorGroupFeature, ) error { return retry.RetryOnConflict(retry.DefaultRetry, func() error { @@ -972,8 +973,8 @@ func (c *QuotaController) updateProjectQuotaStatusWithRetry( pq.Status.TotalUsageSummary = buildUsageSummary(pq.Status.TotalUsage) pq.Status.PaygUsageSummary = buildUsageSummary(pq.Status.PaygUsage) // Limes unit summaries for debugging (converted from internal GiB to declared units) - pq.Status.LimesUsageSummary = buildLimesSummary(pq.Status.TotalUsage, c.Config.FlavorGroupResourceConfig) - pq.Status.LimesQuotaSummary = buildLimesSummary(pq.Spec.Quota, c.Config.FlavorGroupResourceConfig) + pq.Status.LimesUsageSummary = buildLimesSummary(pq.Status.TotalUsage, flavorGroups) + pq.Status.LimesQuotaSummary = buildLimesSummary(pq.Spec.Quota, flavorGroups) pq.Status.ObservedGeneration = pq.Generation now := metav1.Now() pq.Status.LastReconcileAt = &now @@ -1066,7 +1067,8 @@ func buildUsageSummary(usage map[string]int64) string { // For RAM resources in fixed-ratio groups: value * 1024 / SmallestFlavor.MemoryMB (GiB→slots). // For RAM resources in variable-ratio groups: value is already in GiB = declared units (1:1). // For cores and instances: value is 1:1 (no conversion). -func buildLimesSummary(values map[string]int64, cfg map[string]commitments.FlavorGroupResourcesConfig) string { +// Uses FlavorGroupFeature from the Knowledge CRD for RAM unit conversion (consistent with the usage API). +func buildLimesSummary(values map[string]int64, flavorGroups map[string]compute.FlavorGroupFeature) string { if len(values) == 0 { return "" } @@ -1076,7 +1078,11 @@ func buildLimesSummary(values map[string]int64, cfg map[string]commitments.Flavo if strings.HasPrefix(key, "hw_version_") && strings.HasSuffix(key, "_ram") { name := strings.TrimPrefix(key, "hw_version_") groupName := strings.TrimSuffix(name, "_ram") - converted[key] = commitments.ResourceConfigForGroup(cfg, groupName).RAM.GiBToDeclaredUnits(val) + if fg, ok := flavorGroups[groupName]; ok { + converted[key] = fg.GiBToDeclaredUnits(val) + } else { + converted[key] = val // no conversion if group unknown + } } else { converted[key] = val }