Skip to content

Commit be7b52d

Browse files
francoispqtFxKu
andauthored
add preferred during scheduling pod anti affinity (#2048)
* add preferred during scheduling pod anti affinity Co-authored-by: Felix Kunde <felix-kunde@gmx.de>
1 parent 93a253b commit be7b52d

File tree

10 files changed

+215
-89
lines changed

10 files changed

+215
-89
lines changed

charts/postgres-operator/crds/operatorconfigurations.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,9 @@ spec:
281281
pod_antiaffinity_topology_key:
282282
type: string
283283
default: "kubernetes.io/hostname"
284+
pod_antiaffinity_preferred_during_scheduling:
285+
type: boolean
286+
default: false
284287
pod_environment_configmap:
285288
type: string
286289
pod_environment_secret:

charts/postgres-operator/values.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,8 @@ configKubernetes:
167167
pdb_name_format: "postgres-{cluster}-pdb"
168168
# override topology key for pod anti affinity
169169
pod_antiaffinity_topology_key: "kubernetes.io/hostname"
170+
# switches pod anti affinity type to `preferredDuringSchedulingIgnoredDuringExecution`
171+
# pod_antiaffinity_preferred_during_scheduling: true
170172
# namespaced name of the ConfigMap with environment variables to populate on every pod
171173
# pod_environment_configmap: "default/my-custom-config"
172174
# name of the Secret (in cluster namespace) with environment variables to populate on every pod

docs/administrator.md

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ CRDs set `enable_crd_registration` config option to `false`.
1515

1616
CRDs are defined with a `openAPIV3Schema` structural schema against which new
1717
manifests of [`postgresql`](https://github.com/zalando/postgres-operator/blob/master/manifests/postgresql.crd.yaml) or [`OperatorConfiguration`](https://github.com/zalando/postgres-operator/blob/master/manifests/operatorconfiguration.crd.yaml)
18-
resources will be validated. On creation you can bypass the validation with
18+
resources will be validated. On creation you can bypass the validation with
1919
`kubectl create --validate=false`.
2020

2121
By default, the operator will register the CRDs in the `all` category so
@@ -516,6 +516,9 @@ configuration:
516516
enable_pod_antiaffinity: true
517517
```
518518

519+
By default the type of pod anti affinity is `requiredDuringSchedulingIgnoredDuringExecution`,
520+
you can switch to `preferredDuringSchedulingIgnoredDuringExecution` by setting `pod_antiaffinity_preferred_during_scheduling: true`.
521+
519522
By default the topology key for the pod anti affinity is set to
520523
`kubernetes.io/hostname`, you can set another topology key e.g.
521524
`failure-domain.beta.kubernetes.io/zone`. See [built-in node labels](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#interlude-built-in-node-labels) for available topology keys.
@@ -1091,7 +1094,7 @@ data:
10911094
USE_WALG_BACKUP: "true"
10921095
USE_WALG_RESTORE: "true"
10931096
CLONE_USE_WALG_RESTORE: "true"
1094-
WALG_AZ_PREFIX: "azure://container-name/$(SCOPE)/$(PGVERSION)" # Enables Azure Backups (SCOPE = Cluster name) (PGVERSION = Postgres version)
1097+
WALG_AZ_PREFIX: "azure://container-name/$(SCOPE)/$(PGVERSION)" # Enables Azure Backups (SCOPE = Cluster name) (PGVERSION = Postgres version)
10951098
```
10961099

10971100
3. Setup your operator configuration values. With the `psql-backup-creds`

pkg/apis/acid.zalan.do/v1/crds.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1378,6 +1378,9 @@ var OperatorConfigCRDResourceValidation = apiextv1.CustomResourceValidation{
13781378
"pod_antiaffinity_topology_key": {
13791379
Type: "string",
13801380
},
1381+
"pod_antiaffinity_preferred_during_scheduling": {
1382+
Type: "boolean",
1383+
},
13811384
"pod_environment_configmap": {
13821385
Type: "string",
13831386
},

pkg/apis/acid.zalan.do/v1/operator_configuration_type.go

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -91,16 +91,17 @@ type KubernetesMetaConfiguration struct {
9191
NodeReadinessLabelMerge string `json:"node_readiness_label_merge,omitempty"`
9292
CustomPodAnnotations map[string]string `json:"custom_pod_annotations,omitempty"`
9393
// TODO: use a proper toleration structure?
94-
PodToleration map[string]string `json:"toleration,omitempty"`
95-
PodEnvironmentConfigMap spec.NamespacedName `json:"pod_environment_configmap,omitempty"`
96-
PodEnvironmentSecret string `json:"pod_environment_secret,omitempty"`
97-
PodPriorityClassName string `json:"pod_priority_class_name,omitempty"`
98-
MasterPodMoveTimeout Duration `json:"master_pod_move_timeout,omitempty"`
99-
EnablePodAntiAffinity bool `json:"enable_pod_antiaffinity,omitempty"`
100-
PodAntiAffinityTopologyKey string `json:"pod_antiaffinity_topology_key,omitempty"`
101-
PodManagementPolicy string `json:"pod_management_policy,omitempty"`
102-
EnableReadinessProbe bool `json:"enable_readiness_probe,omitempty"`
103-
EnableCrossNamespaceSecret bool `json:"enable_cross_namespace_secret,omitempty"`
94+
PodToleration map[string]string `json:"toleration,omitempty"`
95+
PodEnvironmentConfigMap spec.NamespacedName `json:"pod_environment_configmap,omitempty"`
96+
PodEnvironmentSecret string `json:"pod_environment_secret,omitempty"`
97+
PodPriorityClassName string `json:"pod_priority_class_name,omitempty"`
98+
MasterPodMoveTimeout Duration `json:"master_pod_move_timeout,omitempty"`
99+
EnablePodAntiAffinity bool `json:"enable_pod_antiaffinity,omitempty"`
100+
PodAntiAffinityTopologyKey string `json:"pod_antiaffinity_topology_key,omitempty"`
101+
PodAntiAffinityPreferredDuringScheduling bool `json:"pod_antiaffinity_preferred_during_scheduling,omitempty"`
102+
PodManagementPolicy string `json:"pod_management_policy,omitempty"`
103+
EnableReadinessProbe bool `json:"enable_readiness_probe,omitempty"`
104+
EnableCrossNamespaceSecret bool `json:"enable_cross_namespace_secret,omitempty"`
104105
}
105106

106107
// PostgresPodResourcesDefaults defines the spec of default resources

pkg/cluster/connection_pooler.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -354,7 +354,12 @@ func (c *Cluster) generateConnectionPoolerPodTemplate(role PostgresRole) (
354354
nodeAffinity := c.nodeAffinity(c.OpConfig.NodeReadinessLabel, spec.NodeAffinity)
355355
if c.OpConfig.EnablePodAntiAffinity {
356356
labelsSet := labels.Set(c.connectionPoolerLabels(role, false).MatchLabels)
357-
podTemplate.Spec.Affinity = generatePodAffinity(labelsSet, c.OpConfig.PodAntiAffinityTopologyKey, nodeAffinity)
357+
podTemplate.Spec.Affinity = generatePodAffinity(
358+
labelsSet,
359+
c.OpConfig.PodAntiAffinityTopologyKey,
360+
nodeAffinity,
361+
c.OpConfig.PodAntiAffinityPreferredDuringScheduling,
362+
)
358363
} else if nodeAffinity != nil {
359364
podTemplate.Spec.Affinity = nodeAffinity
360365
}

pkg/cluster/k8sres.go

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -495,17 +495,27 @@ func (c *Cluster) nodeAffinity(nodeReadinessLabel map[string]string, nodeAffinit
495495
}
496496
}
497497

498-
func generatePodAffinity(labels labels.Set, topologyKey string, nodeAffinity *v1.Affinity) *v1.Affinity {
498+
func generatePodAffinity(labels labels.Set, topologyKey string, nodeAffinity *v1.Affinity, preferredDuringScheduling bool) *v1.Affinity {
499499
// generate pod anti-affinity to avoid multiple pods of the same Postgres cluster in the same topology , e.g. node
500-
podAffinity := v1.Affinity{
501-
PodAntiAffinity: &v1.PodAntiAffinity{
502-
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{{
503-
LabelSelector: &metav1.LabelSelector{
504-
MatchLabels: labels,
505-
},
506-
TopologyKey: topologyKey,
507-
}},
500+
501+
podAffinityTerm := v1.PodAffinityTerm{
502+
LabelSelector: &metav1.LabelSelector{
503+
MatchLabels: labels,
508504
},
505+
TopologyKey: topologyKey,
506+
}
507+
508+
podAffinity := v1.Affinity{
509+
PodAntiAffinity: &v1.PodAntiAffinity{},
510+
}
511+
512+
if preferredDuringScheduling {
513+
podAffinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution = []v1.WeightedPodAffinityTerm{{
514+
Weight: 1,
515+
PodAffinityTerm: podAffinityTerm,
516+
}}
517+
} else {
518+
podAffinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution = []v1.PodAffinityTerm{podAffinityTerm}
509519
}
510520

511521
if nodeAffinity != nil && nodeAffinity.NodeAffinity != nil {
@@ -727,6 +737,7 @@ func (c *Cluster) generatePodTemplate(
727737
shmVolume *bool,
728738
podAntiAffinity bool,
729739
podAntiAffinityTopologyKey string,
740+
podAntiAffinityPreferredDuringScheduling bool,
730741
additionalSecretMount string,
731742
additionalSecretMountPath string,
732743
additionalVolumes []acidv1.AdditionalVolume,
@@ -767,7 +778,12 @@ func (c *Cluster) generatePodTemplate(
767778
}
768779

769780
if podAntiAffinity {
770-
podSpec.Affinity = generatePodAffinity(labels, podAntiAffinityTopologyKey, nodeAffinity)
781+
podSpec.Affinity = generatePodAffinity(
782+
labels,
783+
podAntiAffinityTopologyKey,
784+
nodeAffinity,
785+
podAntiAffinityPreferredDuringScheduling,
786+
)
771787
} else if nodeAffinity != nil {
772788
podSpec.Affinity = nodeAffinity
773789
}
@@ -1376,6 +1392,7 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*appsv1.Statef
13761392
mountShmVolumeNeeded(c.OpConfig, spec),
13771393
c.OpConfig.EnablePodAntiAffinity,
13781394
c.OpConfig.PodAntiAffinityTopologyKey,
1395+
c.OpConfig.PodAntiAffinityPreferredDuringScheduling,
13791396
c.OpConfig.AdditionalSecretMount,
13801397
c.OpConfig.AdditionalSecretMountPath,
13811398
additionalVolumes)
@@ -2122,6 +2139,7 @@ func (c *Cluster) generateLogicalBackupJob() (*batchv1.CronJob, error) {
21222139
util.False(),
21232140
false,
21242141
"",
2142+
false,
21252143
c.OpConfig.AdditionalSecretMount,
21262144
c.OpConfig.AdditionalSecretMountPath,
21272145
[]acidv1.AdditionalVolume{}); err != nil {

pkg/cluster/k8sres_test.go

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1360,6 +1360,95 @@ func TestNodeAffinity(t *testing.T) {
13601360
assert.Equal(t, s.Spec.Template.Spec.Affinity.NodeAffinity, nodeAff, "cluster template has correct node affinity")
13611361
}
13621362

1363+
func TestPodAntiAffinityrRequiredDuringScheduling(t *testing.T) {
1364+
var err error
1365+
var spiloRunAsUser = int64(101)
1366+
var spiloRunAsGroup = int64(103)
1367+
var spiloFSGroup = int64(103)
1368+
1369+
spec := acidv1.PostgresSpec{
1370+
TeamID: "myapp", NumberOfInstances: 1,
1371+
Resources: &acidv1.Resources{
1372+
ResourceRequests: acidv1.ResourceDescription{CPU: "1", Memory: "10"},
1373+
ResourceLimits: acidv1.ResourceDescription{CPU: "1", Memory: "10"},
1374+
},
1375+
Volume: acidv1.Volume{
1376+
Size: "1G",
1377+
},
1378+
}
1379+
1380+
cluster := New(
1381+
Config{
1382+
OpConfig: config.Config{
1383+
PodManagementPolicy: "ordered_ready",
1384+
ProtectedRoles: []string{"admin"},
1385+
Auth: config.Auth{
1386+
SuperUsername: superUserName,
1387+
ReplicationUsername: replicationUserName,
1388+
},
1389+
Resources: config.Resources{
1390+
SpiloRunAsUser: &spiloRunAsUser,
1391+
SpiloRunAsGroup: &spiloRunAsGroup,
1392+
SpiloFSGroup: &spiloFSGroup,
1393+
},
1394+
EnablePodAntiAffinity: true,
1395+
},
1396+
}, k8sutil.KubernetesClient{}, acidv1.Postgresql{}, logger, eventRecorder)
1397+
1398+
s, err := cluster.generateStatefulSet(&spec)
1399+
if err != nil {
1400+
assert.NoError(t, err)
1401+
}
1402+
1403+
assert.Nil(t, s.Spec.Template.Spec.Affinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution, "pod anti-affinity should not use preferredDuringScheduling")
1404+
assert.NotNil(t, s.Spec.Template.Spec.Affinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution, "pod anti-affinity should use requiredDuringScheduling")
1405+
}
1406+
1407+
func TestPodAntiAffinityPreferredDuringScheduling(t *testing.T) {
1408+
var err error
1409+
var spiloRunAsUser = int64(101)
1410+
var spiloRunAsGroup = int64(103)
1411+
var spiloFSGroup = int64(103)
1412+
1413+
spec := acidv1.PostgresSpec{
1414+
TeamID: "myapp", NumberOfInstances: 1,
1415+
Resources: &acidv1.Resources{
1416+
ResourceRequests: acidv1.ResourceDescription{CPU: "1", Memory: "10"},
1417+
ResourceLimits: acidv1.ResourceDescription{CPU: "1", Memory: "10"},
1418+
},
1419+
Volume: acidv1.Volume{
1420+
Size: "1G",
1421+
},
1422+
}
1423+
1424+
cluster := New(
1425+
Config{
1426+
OpConfig: config.Config{
1427+
PodManagementPolicy: "ordered_ready",
1428+
ProtectedRoles: []string{"admin"},
1429+
Auth: config.Auth{
1430+
SuperUsername: superUserName,
1431+
ReplicationUsername: replicationUserName,
1432+
},
1433+
Resources: config.Resources{
1434+
SpiloRunAsUser: &spiloRunAsUser,
1435+
SpiloRunAsGroup: &spiloRunAsGroup,
1436+
SpiloFSGroup: &spiloFSGroup,
1437+
},
1438+
EnablePodAntiAffinity: true,
1439+
PodAntiAffinityPreferredDuringScheduling: true,
1440+
},
1441+
}, k8sutil.KubernetesClient{}, acidv1.Postgresql{}, logger, eventRecorder)
1442+
1443+
s, err := cluster.generateStatefulSet(&spec)
1444+
if err != nil {
1445+
assert.NoError(t, err)
1446+
}
1447+
1448+
assert.NotNil(t, s.Spec.Template.Spec.Affinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution, "pod anti-affinity should use preferredDuringScheduling")
1449+
assert.Nil(t, s.Spec.Template.Spec.Affinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution, "pod anti-affinity should not use requiredDuringScheduling")
1450+
}
1451+
13631452
func testDeploymentOwnerReference(cluster *Cluster, deployment *appsv1.Deployment) error {
13641453
owner := deployment.ObjectMeta.OwnerReferences[0]
13651454

pkg/controller/operator_config.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur
123123
result.MasterPodMoveTimeout = util.CoalesceDuration(time.Duration(fromCRD.Kubernetes.MasterPodMoveTimeout), "10m")
124124
result.EnablePodAntiAffinity = fromCRD.Kubernetes.EnablePodAntiAffinity
125125
result.PodAntiAffinityTopologyKey = util.Coalesce(fromCRD.Kubernetes.PodAntiAffinityTopologyKey, "kubernetes.io/hostname")
126+
result.PodAntiAffinityPreferredDuringScheduling = fromCRD.Kubernetes.PodAntiAffinityPreferredDuringScheduling
126127
result.PodToleration = fromCRD.Kubernetes.PodToleration
127128

128129
// Postgres Pod resources

0 commit comments

Comments
 (0)