Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3,435 changes: 1,726 additions & 1,709 deletions CATALOG.md

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions expected_results.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ testCases:
- lifecycle-pod-toleration-bypass
- lifecycle-readiness-probe
- lifecycle-startup-probe
- lifecycle-topology-spread-constraint
- manageability-container-port-name-format
- manageability-containers-image-tag
- networking-dual-stack-service
Expand Down
1 change: 1 addition & 0 deletions tests/identifiers/doclinks.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ const (
TestStatefulSetScalingIdentifierDocLink = "https://redhat-best-practices-for-k8s.github.io/guide/#k8s-best-practices-high-level-cnf-expectations"
TestImagePullPolicyIdentifierDocLink = "https://redhat-best-practices-for-k8s.github.io/guide/#k8s-best-practices-use-imagepullpolicy:-ifnotpresent"
TestPodRecreationIdentifierDocLink = "https://redhat-best-practices-for-k8s.github.io/guide/#k8s-best-practices-upgrade-expectations"
TestTopologySpreadConstraintDocLink = "https://redhat-best-practices-for-k8s.github.io/guide/#k8s-best-practices-high-level-cnf-expectations"
TestLivenessProbeIdentifierDocLink = "https://redhat-best-practices-for-k8s.github.io/guide/#k8s-best-practices-liveness-readiness-and-startup-probes"
TestReadinessProbeIdentifierDocLink = "https://redhat-best-practices-for-k8s.github.io/guide/#k8s-best-practices-liveness-readiness-and-startup-probes"
TestStartupProbeIdentifierDocLink = "https://redhat-best-practices-for-k8s.github.io/guide/#k8s-best-practices-liveness-readiness-and-startup-probes"
Expand Down
17 changes: 17 additions & 0 deletions tests/identifiers/identifiers.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ var (
TestStatefulSetScalingIdentifier claim.Identifier
TestImagePullPolicyIdentifier claim.Identifier
TestPodRecreationIdentifier claim.Identifier
TestTopologySpreadConstraint claim.Identifier
TestPodRoleBindingsBestPracticesIdentifier claim.Identifier
TestPodServiceAccountBestPracticesIdentifier claim.Identifier
TestPodAutomountServiceAccountIdentifier claim.Identifier
Expand Down Expand Up @@ -1229,6 +1230,22 @@ that Node's kernel may not have the same hacks.'`,
},
TagCommon)

TestTopologySpreadConstraint = AddCatalogEntry(
"topology-spread-constraint",
common.LifecycleTestKey,
`Ensures that Deployments using TopologySpreadConstraints include constraints for both hostname and zone topology keys. This helps telco workloads avoid needing to tweak PodDisruptionBudgets before platform upgrades. If TopologySpreadConstraints is not defined, the test passes as Kubernetes scheduler implicitly uses hostname and zone constraints.`+NotApplicableSNO, //nolint:lll
TopologySpreadConstraintRemediation,
NoDocumentedProcess,
TestTopologySpreadConstraintDocLink,
true,
map[string]string{
FarEdge: Optional,
Telco: Mandatory,
NonTelco: Optional,
Extended: Optional,
},
TagTelco)

TestPodRoleBindingsBestPracticesIdentifier = AddCatalogEntry(
"pod-role-bindings",
common.AccessControlTestKey,
Expand Down
2 changes: 2 additions & 0 deletions tests/identifiers/impact.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ const (
TestStatefulSetScalingIdentifierImpact = `StatefulSet scaling issues can prevent proper data persistence and ordered deployment of stateful applications.`
TestImagePullPolicyIdentifierImpact = `Incorrect image pull policies can cause deployment failures when image registries are unavailable or during network issues.`
TestPodRecreationIdentifierImpact = `Failed pod recreation indicates poor high availability configuration, leading to potential service outages during node failures.`
TestTopologySpreadConstraintImpact = `Without proper topology spread constraints, pods may cluster on nodes causing PodDisruptionBudgets to block platform upgrades, requiring manual PDB adjustments and increasing operational complexity during maintenance windows.`
TestLivenessProbeIdentifierImpact = `Missing liveness probes prevent Kubernetes from detecting and recovering from application deadlocks and hangs.`
TestReadinessProbeIdentifierImpact = `Missing readiness probes can cause traffic to be routed to non-ready pods, resulting in failed requests and poor user experience.`
TestStartupProbeIdentifierImpact = `Missing startup probes can cause slow-starting applications to be killed prematurely, preventing successful application startup.`
Expand Down Expand Up @@ -243,6 +244,7 @@ var ImpactMap = map[string]string{
"lifecycle-statefulset-scaling": TestStatefulSetScalingIdentifierImpact,
"lifecycle-image-pull-policy": TestImagePullPolicyIdentifierImpact,
"lifecycle-pod-recreation": TestPodRecreationIdentifierImpact,
"lifecycle-topology-spread-constraint": TestTopologySpreadConstraintImpact,
"lifecycle-liveness-probe": TestLivenessProbeIdentifierImpact,
"lifecycle-readiness-probe": TestReadinessProbeIdentifierImpact,
"lifecycle-startup-probe": TestStartupProbeIdentifierImpact,
Expand Down
2 changes: 2 additions & 0 deletions tests/identifiers/remediation.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,8 @@ const (

PodRecreationRemediation = `Ensure that the workloads Pods utilize a configuration that supports High Availability. Additionally, ensure that there are available Nodes in the OpenShift cluster that can be utilized in the event that a host Node fails.`

TopologySpreadConstraintRemediation = `If using TopologySpreadConstraints in your Deployment, ensure you include constraints for both 'kubernetes.io/hostname' and 'topology.kubernetes.io/zone' topology keys. Alternatively, you can omit TopologySpreadConstraints entirely to let Kubernetes scheduler use implicit hostname and zone constraints. This helps maintain workload availability during platform upgrades without manually adjusting PodDisruptionBudgets.`

SysctlConfigsRemediation = `You should recreate the node or change the sysctls, recreating is recommended because there might be other unknown changes`

ServiceMeshRemediation = `Ensure all the workload pods are using service mesh if the cluster provides it.`
Expand Down
80 changes: 80 additions & 0 deletions tests/lifecycle/suite.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package lifecycle

import (
"fmt"
"time"

"github.com/redhat-best-practices-for-k8s/certsuite/internal/log"
Expand Down Expand Up @@ -234,6 +235,16 @@ func LoadChecks() {
testStorageProvisioner(c, &env)
return nil
}))

// Topology Spread Constraint test
checksGroup.Add(checksdb.NewCheck(identifiers.GetTestIDAndLabels(identifiers.TestTopologySpreadConstraint)).
WithSkipCheckFn(
testhelper.GetNoDeploymentsUnderTestSkipFn(&env),
testhelper.GetNotEnoughWorkersSkipFn(&env, minWorkerNodesForLifecycle)).
WithCheckFn(func(c *checksdb.Check) error {
testTopologySpreadConstraint(c, &env)
return nil
}))
}

func testContainersPreStop(check *checksdb.Check, env *provider.TestEnvironment) {
Expand Down Expand Up @@ -883,3 +894,72 @@ func testStorageProvisioner(check *checksdb.Check, env *provider.TestEnvironment
}
check.SetResult(compliantObjects, nonCompliantObjects)
}

const (
hostnameTopologyKey = "kubernetes.io/hostname"
zoneTopologyKey = "topology.kubernetes.io/zone"
)

func testTopologySpreadConstraint(check *checksdb.Check, env *provider.TestEnvironment) {
var compliantObjects []*testhelper.ReportObject
var nonCompliantObjects []*testhelper.ReportObject

for _, deployment := range env.Deployments {
check.LogInfo("Testing Deployment %q", deployment)

// Get the topology spread constraints from the pod template
tsc := deployment.Spec.Template.Spec.TopologySpreadConstraints

// Case 1: No TSC defined - PASS (implicit k8s behavior)
if len(tsc) == 0 {
check.LogInfo("Deployment %q does not define TopologySpreadConstraints (implicit scheduling is acceptable)", deployment)
compliantObjects = append(compliantObjects,
testhelper.NewDeploymentReportObject(deployment.Namespace, deployment.Name,
"TopologySpreadConstraints not defined (implicit Kubernetes scheduling behavior)", true))
continue
}

// Case 2: TSC is defined - must include both hostname and zone
check.LogInfo("Deployment %q defines TopologySpreadConstraints, checking for required topology keys", deployment)

hasHostname := false
hasZone := false

// Check if both required topology keys are present
for _, constraint := range tsc {
if constraint.TopologyKey == hostnameTopologyKey {
hasHostname = true
check.LogInfo("Deployment %q has hostname topology key: %s", deployment, hostnameTopologyKey)
}
if constraint.TopologyKey == zoneTopologyKey {
hasZone = true
check.LogInfo("Deployment %q has zone topology key: %s", deployment, zoneTopologyKey)
}
}

// Both hostname and zone must be present
if hasHostname && hasZone {
check.LogInfo("Deployment %q has both required topology keys (hostname and zone)", deployment)
compliantObjects = append(compliantObjects,
testhelper.NewDeploymentReportObject(deployment.Namespace, deployment.Name,
"TopologySpreadConstraints includes both hostname and zone topology keys", true))
} else {
// Missing one or both required keys
missingKeys := []string{}
if !hasHostname {
missingKeys = append(missingKeys, hostnameTopologyKey)
}
if !hasZone {
missingKeys = append(missingKeys, zoneTopologyKey)
}

check.LogError("Deployment %q TopologySpreadConstraints is missing required topology keys: %v", deployment, missingKeys)
nonCompliantObjects = append(nonCompliantObjects,
testhelper.NewDeploymentReportObject(deployment.Namespace, deployment.Name,
"TopologySpreadConstraints must include both hostname and zone topology keys when defined", false).
AddField("MissingTopologyKeys", fmt.Sprintf("%v", missingKeys)))
}
}

check.SetResult(compliantObjects, nonCompliantObjects)
}
Loading