From 4c63a275a112b29cb7e99d58bcc6c88ec34e728c Mon Sep 17 00:00:00 2001 From: Todd Short Date: Thu, 23 Apr 2026 15:40:42 -0400 Subject: [PATCH] UPSTREAM: : OCPBUGS-62517: Set replicas=1, PDB, and pod anti-affinity for HA topology Rolling updates in HighlyAvailable clusters leave catalogd and operator-controller unavailable when the only running pod is evicted before its replacement is ready. Fix by defaulting replicas=1 and PDB disabled in the static Helm values (safe for SNO/External topologies, passes the SNO conformance test that asserts exactly one replica in SingleReplica topology mode). Add pod anti-affinity to prefer scheduling replicas on different nodes. cluster-olm-operator detects the cluster's ControlPlaneTopology at startup and overrides these values to replicas=2 and PDB enabled when a HighlyAvailable topology is detected, then re-renders the manifests before starting controllers. When a topology change is observed at runtime (exceedingly rare), the operator exits so its deployment controller restarts it, triggering a fresh Helm render with the correct values for the new topology. Co-Authored-By: Claude Sonnet 4.6 Signed-off-by: Todd Short --- openshift/catalogd/manifests-experimental.yaml | 8 ++++++++ openshift/catalogd/manifests.yaml | 8 ++++++++ openshift/helm/catalogd.yaml | 10 ++++++++++ openshift/helm/operator-controller.yaml | 10 ++++++++++ .../operator-controller/manifests-experimental.yaml | 8 ++++++++ openshift/operator-controller/manifests.yaml | 8 ++++++++ 6 files changed, 52 insertions(+) diff --git a/openshift/catalogd/manifests-experimental.yaml b/openshift/catalogd/manifests-experimental.yaml index 50ec4aba9c..4ab282db20 100644 --- a/openshift/catalogd/manifests-experimental.yaml +++ b/openshift/catalogd/manifests-experimental.yaml @@ -957,6 +957,14 @@ spec: operator: In values: - linux + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchLabels: + control-plane: catalogd-controller-manager + topologyKey: kubernetes.io/hostname + weight: 100 nodeSelector: kubernetes.io/os: linux node-role.kubernetes.io/control-plane: "" diff --git a/openshift/catalogd/manifests.yaml b/openshift/catalogd/manifests.yaml index 5b8f16cc73..242ddd757f 100644 --- a/openshift/catalogd/manifests.yaml +++ b/openshift/catalogd/manifests.yaml @@ -956,6 +956,14 @@ spec: operator: In values: - linux + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchLabels: + control-plane: catalogd-controller-manager + topologyKey: kubernetes.io/hostname + weight: 100 nodeSelector: kubernetes.io/os: linux node-role.kubernetes.io/control-plane: "" diff --git a/openshift/helm/catalogd.yaml b/openshift/helm/catalogd.yaml index 87b47361ff..0c2954300e 100644 --- a/openshift/helm/catalogd.yaml +++ b/openshift/helm/catalogd.yaml @@ -8,6 +8,7 @@ options: enabled: true deployment: image: ${CATALOGD_IMAGE} + replicas: 1 podDisruptionBudget: enabled: false operatorController: @@ -25,6 +26,15 @@ namespaces: # Deployment values for catalogd deployments: templateSpec: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + topologyKey: kubernetes.io/hostname + labelSelector: + matchLabels: + control-plane: catalogd-controller-manager priorityClassName: system-cluster-critical securityContext: seLinuxOptions: diff --git a/openshift/helm/operator-controller.yaml b/openshift/helm/operator-controller.yaml index ee6276a252..b9ea4ae498 100644 --- a/openshift/helm/operator-controller.yaml +++ b/openshift/helm/operator-controller.yaml @@ -8,6 +8,7 @@ options: enabled: true deployment: image: ${OPERATOR_CONTROLLER_IMAGE} + replicas: 1 podDisruptionBudget: enabled: false catalogd: @@ -26,6 +27,15 @@ namespaces: # Deployment values for operator-controller deployments: templateSpec: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + topologyKey: kubernetes.io/hostname + labelSelector: + matchLabels: + control-plane: operator-controller-controller-manager priorityClassName: system-cluster-critical securityContext: seLinuxOptions: diff --git a/openshift/operator-controller/manifests-experimental.yaml b/openshift/operator-controller/manifests-experimental.yaml index 1a97864aa4..91b6150336 100644 --- a/openshift/operator-controller/manifests-experimental.yaml +++ b/openshift/operator-controller/manifests-experimental.yaml @@ -1345,6 +1345,14 @@ spec: operator: In values: - linux + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchLabels: + control-plane: operator-controller-controller-manager + topologyKey: kubernetes.io/hostname + weight: 100 nodeSelector: kubernetes.io/os: linux node-role.kubernetes.io/control-plane: "" diff --git a/openshift/operator-controller/manifests.yaml b/openshift/operator-controller/manifests.yaml index f8d8b1fed4..b0d173f2fe 100644 --- a/openshift/operator-controller/manifests.yaml +++ b/openshift/operator-controller/manifests.yaml @@ -1188,6 +1188,14 @@ spec: operator: In values: - linux + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchLabels: + control-plane: operator-controller-controller-manager + topologyKey: kubernetes.io/hostname + weight: 100 nodeSelector: kubernetes.io/os: linux node-role.kubernetes.io/control-plane: ""