Skip to content

Commit ce3c922

Browse files
committed
Remove tablespaces stuff from snapshots stuff and add early exit if both features are enabled. Add string length validation to volume snapshot class name. If multiple snapshots with errors exist, delete all but the one with the latest error. Add/adjust tests.
1 parent 556f7a4 commit ce3c922

File tree

9 files changed

+132
-179
lines changed

9 files changed

+132
-179
lines changed

config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4330,6 +4330,7 @@ spec:
43304330
volumeSnapshotClassName:
43314331
description: Name of the VolumeSnapshotClass that should be
43324332
used by VolumeSnapshots
4333+
minLength: 1
43334334
type: string
43344335
required:
43354336
- volumeSnapshotClassName

config/rbac/cluster/role.yaml

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -163,14 +163,6 @@ rules:
163163
- list
164164
- patch
165165
- watch
166-
- apiGroups:
167-
- snapshot.storage.k8s.io
168-
resources:
169-
- volumesnapshotclasses
170-
verbs:
171-
- get
172-
- list
173-
- watch
174166
- apiGroups:
175167
- snapshot.storage.k8s.io
176168
resources:

config/rbac/namespace/role.yaml

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -163,14 +163,6 @@ rules:
163163
- list
164164
- patch
165165
- watch
166-
- apiGroups:
167-
- snapshot.storage.k8s.io
168-
resources:
169-
- volumesnapshotclasses
170-
verbs:
171-
- get
172-
- list
173-
- watch
174166
- apiGroups:
175167
- snapshot.storage.k8s.io
176168
resources:

internal/controller/postgrescluster/controller.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -365,8 +365,7 @@ func (r *Reconciler) Reconcile(
365365
}
366366
}
367367
if err == nil {
368-
dedicatedSnapshotPVC, err = r.reconcileDedicatedSnapshotVolume(ctx, cluster, instances,
369-
clusterVolumes)
368+
dedicatedSnapshotPVC, err = r.reconcileDedicatedSnapshotVolume(ctx, cluster, clusterVolumes)
370369
}
371370
if err == nil {
372371
err = r.reconcileVolumeSnapshots(ctx, cluster, dedicatedSnapshotPVC)

internal/controller/postgrescluster/snapshots.go

Lines changed: 57 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ import (
2828
)
2929

3030
// +kubebuilder:rbac:groups="snapshot.storage.k8s.io",resources="volumesnapshots",verbs={get,list,create,patch,delete}
31-
// +kubebuilder:rbac:groups="snapshot.storage.k8s.io",resources="volumesnapshotclasses",verbs={get,list}
3231

3332
// reconcileVolumeSnapshots creates and manages VolumeSnapshots if the proper VolumeSnapshot CRDs
3433
// are installed and VolumeSnapshots are enabled for the PostgresCluster. A VolumeSnapshot of the
@@ -52,10 +51,6 @@ func (r *Reconciler) reconcileVolumeSnapshots(ctx context.Context,
5251
// Get feature gate state
5352
volumeSnapshotsFeatureEnabled := feature.Enabled(ctx, feature.VolumeSnapshots)
5453

55-
// TODO(snapshots): If user has a snapshots section in their cluster spec, indicating
56-
// that they would like to use snapshots, but the VolumeSnapshots feature gate is not
57-
// turned on, should we emit an event or log something?
58-
5954
// Check if the Kube cluster has VolumeSnapshots installed. If VolumeSnapshots
6055
// are not installed, we need to return early. If user is attempting to use
6156
// VolumeSnapshots, return an error, otherwise return nil.
@@ -71,6 +66,16 @@ func (r *Reconciler) reconcileVolumeSnapshots(ctx context.Context,
7166
}
7267
}
7368

69+
// If user is attempting to use snapshots and has tablespaces enabled, we
70+
// need to create a warning event indicating that the two features are not
71+
// currently compatible and return early.
72+
if postgrescluster.Spec.Backups.Snapshots != nil && volumeSnapshotsFeatureEnabled &&
73+
clusterUsingTablespaces(ctx, postgrescluster) {
74+
r.Recorder.Event(postgrescluster, corev1.EventTypeWarning, "IncompatibleFeatures",
75+
"VolumeSnapshots not currently compatible with TablespaceVolumes; cannot create snapshot.")
76+
return nil
77+
}
78+
7479
// Get all snapshots for the cluster.
7580
snapshots, err := r.getSnapshotsForCluster(ctx, postgrescluster)
7681
if err != nil {
@@ -85,24 +90,22 @@ func (r *Reconciler) reconcileVolumeSnapshots(ctx context.Context,
8590
// If we got here, then the snapshots are enabled (feature gate is enabled and the
8691
// cluster has a Spec.Backups.Snapshots section defined).
8792

88-
// The specified VolumeSnapshotClass needs to exist.
89-
volumeSnapshotClass := &volumesnapshotv1.VolumeSnapshotClass{
90-
ObjectMeta: metav1.ObjectMeta{
91-
Name: postgrescluster.Spec.Backups.Snapshots.VolumeSnapshotClassName,
92-
},
93-
}
94-
err = errors.WithStack(r.Client.Get(ctx, client.ObjectKeyFromObject(volumeSnapshotClass),
95-
volumeSnapshotClass))
96-
if err != nil {
97-
return err
98-
}
99-
100-
// Check snapshots for errors; if present, create an event. If there
101-
// are multiple snapshots with errors, create event for the latest error.
102-
latestSnapshotWithError := getLatestSnapshotWithError(snapshots)
103-
if latestSnapshotWithError != nil {
93+
// Check snapshots for errors; if present, create an event. If there are
94+
// multiple snapshots with errors, create event for the latest error and
95+
// delete any older snapshots with error.
96+
snapshotWithLatestError := getSnapshotWithLatestError(snapshots)
97+
if snapshotWithLatestError != nil {
10498
r.Recorder.Event(postgrescluster, corev1.EventTypeWarning, "VolumeSnapshotError",
105-
*latestSnapshotWithError.Status.Error.Message)
99+
*snapshotWithLatestError.Status.Error.Message)
100+
for _, snapshot := range snapshots.Items {
101+
if snapshot.Status.Error != nil &&
102+
snapshot.Status.Error.Time.Before(snapshotWithLatestError.Status.Error.Time) {
103+
err = r.deleteControlled(ctx, postgrescluster, &snapshot)
104+
if err != nil {
105+
return err
106+
}
107+
}
108+
}
106109
}
107110

108111
// Get pvc backup job completion annotation. If it does not exist, there has not been
@@ -125,7 +128,8 @@ func (r *Reconciler) reconcileVolumeSnapshots(ctx context.Context,
125128

126129
// If a snapshot exists for the latest backup that has been restored into the dedicated pvc
127130
// and the snapshot is Ready, delete all other snapshots.
128-
if snapshotFoundForPvcUpdate && *snapshots.Items[snapshotForPvcUpdateIdx].Status.ReadyToUse {
131+
if snapshotFoundForPvcUpdate && snapshots.Items[snapshotForPvcUpdateIdx].Status.ReadyToUse != nil &&
132+
*snapshots.Items[snapshotForPvcUpdateIdx].Status.ReadyToUse {
129133
for idx, snapshot := range snapshots.Items {
130134
if idx != snapshotForPvcUpdateIdx {
131135
err = r.deleteControlled(ctx, postgrescluster, &snapshot)
@@ -157,7 +161,6 @@ func (r *Reconciler) reconcileVolumeSnapshots(ctx context.Context,
157161
// after a successful backup.
158162
func (r *Reconciler) reconcileDedicatedSnapshotVolume(
159163
ctx context.Context, cluster *v1beta1.PostgresCluster,
160-
instances *observedInstances,
161164
clusterVolumes []corev1.PersistentVolumeClaim,
162165
) (*corev1.PersistentVolumeClaim, error) {
163166

@@ -219,19 +222,11 @@ func (r *Reconciler) reconcileDedicatedSnapshotVolume(
219222
return pvc, nil
220223
}
221224

222-
// If the pvc has not been annotated, it has not been restored and we want to proceed
223-
// with a restore.
224-
pvcAnnotationTimestampString, pvcAnnotationExists := pvc.GetAnnotations()[naming.PGBackRestBackupJobCompletion]
225-
if pvcAnnotationExists {
226-
// If pvc has been annotated, we want to compare its timestamp to the latest backup job's
227-
// timestamp. If the pvc's timestamp is older than the latest backup job's
228-
// completion time, then we want to do a restore.
229-
backupJobCompletionTime := backupJob.Status.CompletionTime.Time
230-
pvcAnnotationTime, err := time.Parse(time.RFC3339, pvcAnnotationTimestampString)
231-
if err != nil {
232-
return pvc, err
233-
}
234-
if backupJobCompletionTime.Compare(pvcAnnotationTime) <= 0 {
225+
// Return early if the pvc is annotated with a timestamp newer or equal to the latest backup job.
226+
// If the annotation value cannot be parsed, we want to proceed with a restore.
227+
pvcAnnotationTimestampString := pvc.GetAnnotations()[naming.PGBackRestBackupJobCompletion]
228+
if pvcAnnotationTime, err := time.Parse(time.RFC3339, pvcAnnotationTimestampString); err == nil {
229+
if backupJob.Status.CompletionTime.Compare(pvcAnnotationTime) <= 0 {
235230
return pvc, nil
236231
}
237232
}
@@ -250,7 +245,7 @@ func (r *Reconciler) reconcileDedicatedSnapshotVolume(
250245

251246
// If we don't find a restore job, we run one.
252247
if restoreJob == nil {
253-
err = r.dedicatedSnapshotVolumeRestore(ctx, cluster, pvc, instances, clusterVolumes, backupJob)
248+
err = r.dedicatedSnapshotVolumeRestore(ctx, cluster, pvc, backupJob)
254249
return pvc, err
255250
}
256251

@@ -339,7 +334,6 @@ func (r *Reconciler) createDedicatedSnapshotVolume(ctx context.Context,
339334
// dedicated snapshot volume.
340335
func (r *Reconciler) dedicatedSnapshotVolumeRestore(ctx context.Context,
341336
cluster *v1beta1.PostgresCluster, dedicatedSnapshotVolume *corev1.PersistentVolumeClaim,
342-
instances *observedInstances, clusterVolumes []corev1.PersistentVolumeClaim,
343337
backupJob *batchv1.Job,
344338
) error {
345339

@@ -351,25 +345,9 @@ func (r *Reconciler) dedicatedSnapshotVolumeRestore(ctx context.Context,
351345
"--pg1-path=" + pgdata,
352346
"--repo=" + regexRepoIndex.FindString(repoName),
353347
"--delta",
354-
// "--link-all",
355348
}
356349

357-
// NOTE: It seems like our current architecture is forcing us to care about the instanceSet/instance
358-
// and consistently use the same one as the user *could* have multiple instanceSets with different
359-
// storage settings. Do we want to simply always choose the first instanceSet and instance in their
360-
// respective arrays? Or do we want to add something to the API for the user to specify an instanceSet?
361-
// For now, I am choosing the first instanceSet/instance in their arrays.
362-
instanceSet := &cluster.Spec.InstanceSets[0]
363-
instance := instances.bySet[instanceSet.Name][0].Runner
364-
365-
// How are we going to handle tablespace volumes? Feels like if they exist we need to provide them to
366-
// the restore command otherwise they will break in any clones made from a snapshot.
367-
pgtablespaces, err := r.reconcileTablespaceVolumes(ctx, cluster, instanceSet, instance, clusterVolumes)
368-
if err != nil {
369-
return errors.WithStack(err)
370-
}
371-
372-
cmd := pgbackrest.DedicatedSnapshotVolumeRestoreCommand(pgdata, pgtablespaces, strings.Join(opts, " "))
350+
cmd := pgbackrest.DedicatedSnapshotVolumeRestoreCommand(pgdata, strings.Join(opts, " "))
373351

374352
// Create the volume resources required for the Postgres data directory.
375353
dataVolumeMount := postgres.DataVolumeMount()
@@ -539,28 +517,30 @@ func (r *Reconciler) getLatestCompleteBackupJob(ctx context.Context,
539517
return &latestCompleteBackupJob, nil
540518
}
541519

542-
// getLatestSnapshotWithError takes a VolumeSnapshotList and returns a pointer to the
543-
// most recently created snapshot that has an error. If no snapshot errors exist
520+
// getSnapshotWithLatestError takes a VolumeSnapshotList and returns a pointer to the
521+
// snapshot that has most recently had an error. If no snapshot errors exist
544522
// then it returns nil.
545-
func getLatestSnapshotWithError(snapshots *volumesnapshotv1.VolumeSnapshotList) *volumesnapshotv1.VolumeSnapshot {
523+
func getSnapshotWithLatestError(snapshots *volumesnapshotv1.VolumeSnapshotList) *volumesnapshotv1.VolumeSnapshot {
546524
zeroTime := metav1.NewTime(time.Time{})
547-
latestSnapshotWithError := volumesnapshotv1.VolumeSnapshot{
525+
snapshotWithLatestError := volumesnapshotv1.VolumeSnapshot{
548526
Status: &volumesnapshotv1.VolumeSnapshotStatus{
549-
CreationTime: &zeroTime,
527+
Error: &volumesnapshotv1.VolumeSnapshotError{
528+
Time: &zeroTime,
529+
},
550530
},
551531
}
552532
for _, snapshot := range snapshots.Items {
553533
if snapshot.Status.Error != nil &&
554-
latestSnapshotWithError.Status.CreationTime.Before(snapshot.Status.CreationTime) {
555-
latestSnapshotWithError = snapshot
534+
snapshotWithLatestError.Status.Error.Time.Before(snapshot.Status.Error.Time) {
535+
snapshotWithLatestError = snapshot
556536
}
557537
}
558538

559-
if latestSnapshotWithError.Status.CreationTime.Equal(&zeroTime) {
539+
if snapshotWithLatestError.Status.Error.Time.Equal(&zeroTime) {
560540
return nil
561541
}
562542

563-
return &latestSnapshotWithError
543+
return &snapshotWithLatestError
564544
}
565545

566546
// getSnapshotsForCluster gets all the VolumeSnapshots for a given postgrescluster.
@@ -590,7 +570,7 @@ func getLatestReadySnapshot(snapshots *volumesnapshotv1.VolumeSnapshotList) *vol
590570
},
591571
}
592572
for _, snapshot := range snapshots.Items {
593-
if *snapshot.Status.ReadyToUse &&
573+
if snapshot.Status.ReadyToUse != nil && *snapshot.Status.ReadyToUse &&
594574
latestReadySnapshot.Status.CreationTime.Before(snapshot.Status.CreationTime) {
595575
latestReadySnapshot = snapshot
596576
}
@@ -617,3 +597,14 @@ func (r *Reconciler) deleteSnapshots(ctx context.Context,
617597
}
618598
return nil
619599
}
600+
601+
// tablespaceVolumesInUse determines if the TablespaceVolumes feature is enabled and the given
602+
// cluster has tablespace volumes in place.
603+
func clusterUsingTablespaces(ctx context.Context, postgrescluster *v1beta1.PostgresCluster) bool {
604+
for _, instanceSet := range postgrescluster.Spec.InstanceSets {
605+
if instanceSet.TablespaceVolumes != nil && len(instanceSet.TablespaceVolumes) > 0 {
606+
return feature.Enabled(ctx, feature.TablespaceVolumes)
607+
}
608+
}
609+
return false
610+
}

0 commit comments

Comments
 (0)