Skip to content

Commit ca143fe

Browse files
committed
feat: Add lifecycle-topology-spread-constraint test
Implements new test to ensure Deployments using TopologySpreadConstraints include both hostname and zone topology keys to avoid manual PDB tweaking during platform upgrades. Test Details: - Pass: TSC not defined OR both hostname+zone keys present - Fail: TSC defined but missing either key - Skips: SNO and compact clusters (not enough workers) - Classification: Telco (Mandatory), Others (Optional) - Tags: telco, lifecycle Files Modified: - tests/lifecycle/suite.go: Test implementation - tests/identifiers/identifiers.go: Test identifier - tests/identifiers/impact.go: Impact statement - tests/identifiers/remediation.go: Remediation guidance - tests/identifiers/doclinks.go: Documentation link - expected_results.yaml: Added to skip list - CATALOG.md: Test catalog entry (auto-generated)
1 parent 14d9d21 commit ca143fe

File tree

7 files changed

+1829
-1709
lines changed

7 files changed

+1829
-1709
lines changed

CATALOG.md

Lines changed: 1726 additions & 1709 deletions
Large diffs are not rendered by default.

expected_results.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ testCases:
8080
- lifecycle-cpu-isolation
8181
- lifecycle-statefulset-scaling
8282
- lifecycle-storage-provisioner
83+
- lifecycle-topology-spread-constraint
8384
- networking-icmpv6-connectivity
8485
- networking-restart-on-reboot-sriov-pod
8586
- networking-network-attachment-definition-sriov-mtu

tests/identifiers/doclinks.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ const (
8585
TestStatefulSetScalingIdentifierDocLink = "https://redhat-best-practices-for-k8s.github.io/guide/#k8s-best-practices-high-level-cnf-expectations"
8686
TestImagePullPolicyIdentifierDocLink = "https://redhat-best-practices-for-k8s.github.io/guide/#k8s-best-practices-use-imagepullpolicy:-ifnotpresent"
8787
TestPodRecreationIdentifierDocLink = "https://redhat-best-practices-for-k8s.github.io/guide/#k8s-best-practices-upgrade-expectations"
88+
TestTopologySpreadConstraintDocLink = "https://redhat-best-practices-for-k8s.github.io/guide/#k8s-best-practices-high-level-cnf-expectations"
8889
TestLivenessProbeIdentifierDocLink = "https://redhat-best-practices-for-k8s.github.io/guide/#k8s-best-practices-liveness-readiness-and-startup-probes"
8990
TestReadinessProbeIdentifierDocLink = "https://redhat-best-practices-for-k8s.github.io/guide/#k8s-best-practices-liveness-readiness-and-startup-probes"
9091
TestStartupProbeIdentifierDocLink = "https://redhat-best-practices-for-k8s.github.io/guide/#k8s-best-practices-liveness-readiness-and-startup-probes"

tests/identifiers/identifiers.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ var (
143143
TestStatefulSetScalingIdentifier claim.Identifier
144144
TestImagePullPolicyIdentifier claim.Identifier
145145
TestPodRecreationIdentifier claim.Identifier
146+
TestTopologySpreadConstraint claim.Identifier
146147
TestPodRoleBindingsBestPracticesIdentifier claim.Identifier
147148
TestPodServiceAccountBestPracticesIdentifier claim.Identifier
148149
TestPodAutomountServiceAccountIdentifier claim.Identifier
@@ -1229,6 +1230,22 @@ that Node's kernel may not have the same hacks.'`,
12291230
},
12301231
TagCommon)
12311232

1233+
TestTopologySpreadConstraint = AddCatalogEntry(
1234+
"topology-spread-constraint",
1235+
common.LifecycleTestKey,
1236+
`Ensures that Deployments using TopologySpreadConstraints include constraints for both hostname and zone topology keys. This helps telco workloads avoid needing to tweak PodDisruptionBudgets before platform upgrades. If TopologySpreadConstraints is not defined, the test passes as Kubernetes scheduler implicitly uses hostname and zone constraints.`+NotApplicableSNO, //nolint:lll
1237+
TopologySpreadConstraintRemediation,
1238+
NoDocumentedProcess,
1239+
TestTopologySpreadConstraintDocLink,
1240+
true,
1241+
map[string]string{
1242+
FarEdge: Optional,
1243+
Telco: Mandatory,
1244+
NonTelco: Optional,
1245+
Extended: Optional,
1246+
},
1247+
TagTelco)
1248+
12321249
TestPodRoleBindingsBestPracticesIdentifier = AddCatalogEntry(
12331250
"pod-role-bindings",
12341251
common.AccessControlTestKey,

tests/identifiers/impact.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ const (
101101
TestStatefulSetScalingIdentifierImpact = `StatefulSet scaling issues can prevent proper data persistence and ordered deployment of stateful applications.`
102102
TestImagePullPolicyIdentifierImpact = `Incorrect image pull policies can cause deployment failures when image registries are unavailable or during network issues.`
103103
TestPodRecreationIdentifierImpact = `Failed pod recreation indicates poor high availability configuration, leading to potential service outages during node failures.`
104+
TestTopologySpreadConstraintImpact = `Without proper topology spread constraints, pods may cluster on nodes causing PodDisruptionBudgets to block platform upgrades, requiring manual PDB adjustments and increasing operational complexity during maintenance windows.`
104105
TestLivenessProbeIdentifierImpact = `Missing liveness probes prevent Kubernetes from detecting and recovering from application deadlocks and hangs.`
105106
TestReadinessProbeIdentifierImpact = `Missing readiness probes can cause traffic to be routed to non-ready pods, resulting in failed requests and poor user experience.`
106107
TestStartupProbeIdentifierImpact = `Missing startup probes can cause slow-starting applications to be killed prematurely, preventing successful application startup.`
@@ -243,6 +244,7 @@ var ImpactMap = map[string]string{
243244
"lifecycle-statefulset-scaling": TestStatefulSetScalingIdentifierImpact,
244245
"lifecycle-image-pull-policy": TestImagePullPolicyIdentifierImpact,
245246
"lifecycle-pod-recreation": TestPodRecreationIdentifierImpact,
247+
"lifecycle-topology-spread-constraint": TestTopologySpreadConstraintImpact,
246248
"lifecycle-liveness-probe": TestLivenessProbeIdentifierImpact,
247249
"lifecycle-readiness-probe": TestReadinessProbeIdentifierImpact,
248250
"lifecycle-startup-probe": TestStartupProbeIdentifierImpact,

tests/identifiers/remediation.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,8 @@ const (
127127

128128
PodRecreationRemediation = `Ensure that the workloads Pods utilize a configuration that supports High Availability. Additionally, ensure that there are available Nodes in the OpenShift cluster that can be utilized in the event that a host Node fails.`
129129

130+
TopologySpreadConstraintRemediation = `If using TopologySpreadConstraints in your Deployment, ensure you include constraints for both 'kubernetes.io/hostname' and 'topology.kubernetes.io/zone' topology keys. Alternatively, you can omit TopologySpreadConstraints entirely to let Kubernetes scheduler use implicit hostname and zone constraints. This helps maintain workload availability during platform upgrades without manually adjusting PodDisruptionBudgets.`
131+
130132
SysctlConfigsRemediation = `You should recreate the node or change the sysctls, recreating is recommended because there might be other unknown changes`
131133

132134
ServiceMeshRemediation = `Ensure all the workload pods are using service mesh if the cluster provides it.`

tests/lifecycle/suite.go

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
package lifecycle
1818

1919
import (
20+
"fmt"
2021
"time"
2122

2223
"github.com/redhat-best-practices-for-k8s/certsuite/internal/log"
@@ -234,6 +235,16 @@ func LoadChecks() {
234235
testStorageProvisioner(c, &env)
235236
return nil
236237
}))
238+
239+
// Topology Spread Constraint test
240+
checksGroup.Add(checksdb.NewCheck(identifiers.GetTestIDAndLabels(identifiers.TestTopologySpreadConstraint)).
241+
WithSkipCheckFn(
242+
testhelper.GetNoDeploymentsUnderTestSkipFn(&env),
243+
testhelper.GetNotEnoughWorkersSkipFn(&env, minWorkerNodesForLifecycle)).
244+
WithCheckFn(func(c *checksdb.Check) error {
245+
testTopologySpreadConstraint(c, &env)
246+
return nil
247+
}))
237248
}
238249

239250
func testContainersPreStop(check *checksdb.Check, env *provider.TestEnvironment) {
@@ -883,3 +894,72 @@ func testStorageProvisioner(check *checksdb.Check, env *provider.TestEnvironment
883894
}
884895
check.SetResult(compliantObjects, nonCompliantObjects)
885896
}
897+
898+
const (
899+
hostnameTopologyKey = "kubernetes.io/hostname"
900+
zoneTopologyKey = "topology.kubernetes.io/zone"
901+
)
902+
903+
func testTopologySpreadConstraint(check *checksdb.Check, env *provider.TestEnvironment) {
904+
var compliantObjects []*testhelper.ReportObject
905+
var nonCompliantObjects []*testhelper.ReportObject
906+
907+
for _, deployment := range env.Deployments {
908+
check.LogInfo("Testing Deployment %q", deployment)
909+
910+
// Get the topology spread constraints from the pod template
911+
tsc := deployment.Spec.Template.Spec.TopologySpreadConstraints
912+
913+
// Case 1: No TSC defined - PASS (implicit k8s behavior)
914+
if len(tsc) == 0 {
915+
check.LogInfo("Deployment %q does not define TopologySpreadConstraints (implicit scheduling is acceptable)", deployment)
916+
compliantObjects = append(compliantObjects,
917+
testhelper.NewDeploymentReportObject(deployment.Namespace, deployment.Name,
918+
"TopologySpreadConstraints not defined (implicit Kubernetes scheduling behavior)", true))
919+
continue
920+
}
921+
922+
// Case 2: TSC is defined - must include both hostname and zone
923+
check.LogInfo("Deployment %q defines TopologySpreadConstraints, checking for required topology keys", deployment)
924+
925+
hasHostname := false
926+
hasZone := false
927+
928+
// Check if both required topology keys are present
929+
for _, constraint := range tsc {
930+
if constraint.TopologyKey == hostnameTopologyKey {
931+
hasHostname = true
932+
check.LogInfo("Deployment %q has hostname topology key: %s", deployment, hostnameTopologyKey)
933+
}
934+
if constraint.TopologyKey == zoneTopologyKey {
935+
hasZone = true
936+
check.LogInfo("Deployment %q has zone topology key: %s", deployment, zoneTopologyKey)
937+
}
938+
}
939+
940+
// Both hostname and zone must be present
941+
if hasHostname && hasZone {
942+
check.LogInfo("Deployment %q has both required topology keys (hostname and zone)", deployment)
943+
compliantObjects = append(compliantObjects,
944+
testhelper.NewDeploymentReportObject(deployment.Namespace, deployment.Name,
945+
"TopologySpreadConstraints includes both hostname and zone topology keys", true))
946+
} else {
947+
// Missing one or both required keys
948+
missingKeys := []string{}
949+
if !hasHostname {
950+
missingKeys = append(missingKeys, hostnameTopologyKey)
951+
}
952+
if !hasZone {
953+
missingKeys = append(missingKeys, zoneTopologyKey)
954+
}
955+
956+
check.LogError("Deployment %q TopologySpreadConstraints is missing required topology keys: %v", deployment, missingKeys)
957+
nonCompliantObjects = append(nonCompliantObjects,
958+
testhelper.NewDeploymentReportObject(deployment.Namespace, deployment.Name,
959+
"TopologySpreadConstraints must include both hostname and zone topology keys when defined", false).
960+
AddField("MissingTopologyKeys", fmt.Sprintf("%v", missingKeys)))
961+
}
962+
}
963+
964+
check.SetResult(compliantObjects, nonCompliantObjects)
965+
}

0 commit comments

Comments
 (0)