Skip to content

Commit 2051ffa

Browse files
committed
address comments
Signed-off-by: Britania Rodriguez Reyes <britaniar@microsoft.com>
1 parent 513cd83 commit 2051ffa

12 files changed

+166
-349
lines changed

apis/placement/v1beta1/stageupdate_types.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -335,9 +335,10 @@ type UpdateRunStatus struct {
335335
// +kubebuilder:validation:Optional
336336
PolicyObservedClusterCount int `json:"policyObservedClusterCount,omitempty"`
337337

338-
// ResourceSnapshotName records the name of the master resource snapshot that the update run is based on.
339-
// +kubebuilder:validation:Optional
340-
ResourceSnapshotName string `json:"resourceSnapshotName,omitempty"`
338+
// ResourceSnapshotIndexUsed records the resource snapshot index that the update run is based on.
339+
// The index represents the same resource snapshots as specified in the spec field, or the latest.
340+
// +kubbebuilder:validation:Optional
341+
ResourceSnapshotIndexUsed string `json:"resourceSnapshotIndexUsed,omitempty"`
341342

342343
// ApplyStrategy is the apply strategy that the stagedUpdateRun is using.
343344
// It is the same as the apply strategy in the CRP when the staged update run starts.

config/crd/bases/placement.kubernetes-fleet.io_clusterstagedupdateruns.yaml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1855,9 +1855,10 @@ spec:
18551855
All clusters involved in the update run are selected from the list of clusters scheduled by the CRP according
18561856
to the current policy.
18571857
type: string
1858-
resourceSnapshotName:
1859-
description: ResourceSnapshotName records the name of the master resource
1860-
snapshot that the update run is based on.
1858+
resourceSnapshotIndexUsed:
1859+
description: |-
1860+
ResourceSnapshotIndexUsed records the resource snapshot index that the update run is based on.
1861+
The index represents the same resource snapshots as specified in the spec field, or the latest.
18611862
type: string
18621863
stagedUpdateStrategySnapshot:
18631864
description: |-

config/crd/bases/placement.kubernetes-fleet.io_stagedupdateruns.yaml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -774,9 +774,10 @@ spec:
774774
All clusters involved in the update run are selected from the list of clusters scheduled by the CRP according
775775
to the current policy.
776776
type: string
777-
resourceSnapshotName:
778-
description: ResourceSnapshotName records the name of the master resource
779-
snapshot that the update run is based on.
777+
resourceSnapshotIndexUsed:
778+
description: |-
779+
ResourceSnapshotIndexUsed records the resource snapshot index that the update run is based on.
780+
The index represents the same resource snapshots as specified in the spec field, or the latest.
780781
type: string
781782
stagedUpdateStrategySnapshot:
782783
description: |-

pkg/controllers/updaterun/execution.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"errors"
2222
"fmt"
2323
"reflect"
24+
"strconv"
2425
"time"
2526

2627
apierrors "k8s.io/apimachinery/pkg/api/errors"
@@ -92,8 +93,11 @@ func (r *Reconciler) executeUpdatingStage(
9293
toBeUpdatedBindings []placementv1beta1.BindingObj,
9394
) (time.Duration, error) {
9495
updateRunStatus := updateRun.GetUpdateRunStatus()
96+
updateRunSpec := updateRun.GetUpdateRunSpec()
9597
updatingStageStatus := &updateRunStatus.StagesStatus[updatingStageIndex]
96-
resourceSnapshotName := updateRunStatus.ResourceSnapshotName
98+
// The parse error is ignored because the initialization should have caught it.
99+
resourceIndex, _ := strconv.Atoi(updateRunStatus.ResourceSnapshotIndexUsed)
100+
resourceSnapshotName := fmt.Sprintf(placementv1beta1.ResourceSnapshotNameFmt, updateRunSpec.PlacementName, resourceIndex)
97101
updateRunRef := klog.KObj(updateRun)
98102
// Create the map of the toBeUpdatedBindings.
99103
toBeUpdatedBindingsMap := make(map[string]placementv1beta1.BindingObj, len(toBeUpdatedBindings))

pkg/controllers/updaterun/execution_integration_test.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ var _ = Describe("UpdateRun execution tests - double stages", func() {
160160
Expect(k8sClient.Create(ctx, updateRun)).To(Succeed())
161161

162162
By("Validating the initialization succeeded and the execution started")
163-
initialized := generateSucceededInitializationStatus(crp, updateRun, policySnapshot, updateStrategy, clusterResourceOverride)
163+
initialized := generateSucceededInitializationStatus(crp, updateRun, testResourceSnapshotIndex, policySnapshot, updateStrategy, clusterResourceOverride)
164164
wantStatus = generateExecutionStartedStatus(updateRun, initialized)
165165
validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "")
166166

@@ -521,7 +521,7 @@ var _ = Describe("UpdateRun execution tests - double stages", func() {
521521
Expect(k8sClient.Create(ctx, updateRun)).To(Succeed())
522522

523523
By("Validating the initialization succeeded and the execution started")
524-
initialized := generateSucceededInitializationStatus(crp, updateRun, policySnapshot, updateStrategy, clusterResourceOverride)
524+
initialized := generateSucceededInitializationStatus(crp, updateRun, testResourceSnapshotIndex, policySnapshot, updateStrategy, clusterResourceOverride)
525525
wantStatus = generateExecutionStartedStatus(updateRun, initialized)
526526
validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "")
527527

@@ -680,7 +680,7 @@ var _ = Describe("UpdateRun execution tests - single stage", func() {
680680
Expect(k8sClient.Create(ctx, updateRun)).To(Succeed())
681681

682682
By("Validating the initialization succeeded and the execution started")
683-
initialized := generateSucceededInitializationStatusForSmallClusters(crp, updateRun, policySnapshot, updateStrategy)
683+
initialized := generateSucceededInitializationStatusForSmallClusters(crp, updateRun, testResourceSnapshotIndex, policySnapshot, updateStrategy)
684684
wantStatus = generateExecutionStartedStatus(updateRun, initialized)
685685
validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "")
686686

@@ -774,7 +774,7 @@ var _ = Describe("UpdateRun execution tests - single stage", func() {
774774
Expect(k8sClient.Create(ctx, updateRun)).To(Succeed())
775775

776776
By("Validating the initialization succeeded and the execution started")
777-
initialized := generateSucceededInitializationStatusForSmallClusters(crp, updateRun, policySnapshot, updateStrategy)
777+
initialized := generateSucceededInitializationStatusForSmallClusters(crp, updateRun, testResourceSnapshotIndex, policySnapshot, updateStrategy)
778778
wantStatus = generateExecutionStartedStatus(updateRun, initialized)
779779
validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "")
780780

@@ -883,7 +883,7 @@ var _ = Describe("UpdateRun execution tests - single stage", func() {
883883
Expect(k8sClient.Create(ctx, updateRun)).To(Succeed())
884884

885885
By("Validating the initialization succeeded and the execution started")
886-
initialized := generateSucceededInitializationStatusForSmallClusters(crp, updateRun, policySnapshot, updateStrategy)
886+
initialized := generateSucceededInitializationStatusForSmallClusters(crp, updateRun, testResourceSnapshotIndex, policySnapshot, updateStrategy)
887887
wantStatus = generateExecutionStartedStatus(updateRun, initialized)
888888
validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "")
889889

@@ -1014,7 +1014,7 @@ var _ = Describe("UpdateRun execution tests - single stage", func() {
10141014
Expect(k8sClient.Create(ctx, updateRun)).To(Succeed())
10151015

10161016
By("Validating the initialization succeeded and the execution started")
1017-
initialized := generateSucceededInitializationStatusForSmallClusters(crp, updateRun, policySnapshot, updateStrategy)
1017+
initialized := generateSucceededInitializationStatusForSmallClusters(crp, updateRun, testResourceSnapshotIndex, policySnapshot, updateStrategy)
10181018
wantStatus = generateExecutionStartedStatus(updateRun, initialized)
10191019
validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "")
10201020

@@ -1106,7 +1106,7 @@ var _ = Describe("UpdateRun execution tests - single stage", func() {
11061106
Expect(k8sClient.Create(ctx, updateRun)).To(Succeed())
11071107

11081108
By("Validating the initialization succeeded and the execution started")
1109-
initialized := generateSucceededInitializationStatusForSmallClusters(crp, updateRun, policySnapshot, updateStrategy)
1109+
initialized := generateSucceededInitializationStatusForSmallClusters(crp, updateRun, testResourceSnapshotIndex, policySnapshot, updateStrategy)
11101110
wantStatus = generateExecutionStartedStatus(updateRun, initialized)
11111111
validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "")
11121112

@@ -1163,7 +1163,7 @@ var _ = Describe("UpdateRun execution tests - single stage", func() {
11631163
Expect(k8sClient.Create(ctx, updateRun)).To(Succeed())
11641164

11651165
By("Validating the initialization succeeded and the execution started")
1166-
initialized := generateSucceededInitializationStatusForSmallClusters(crp, updateRun, policySnapshot, updateStrategy)
1166+
initialized := generateSucceededInitializationStatusForSmallClusters(crp, updateRun, testResourceSnapshotIndex, policySnapshot, updateStrategy)
11671167
wantStatus = generateExecutionStartedStatus(updateRun, initialized)
11681168
validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "")
11691169
})

pkg/controllers/updaterun/initialization.go

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -471,9 +471,8 @@ func validateAfterStageTask(tasks []placementv1beta1.StageTask) error {
471471
func (r *Reconciler) recordOverrideSnapshots(ctx context.Context, placementKey types.NamespacedName, updateRun placementv1beta1.UpdateRunObj) error {
472472
updateRunRef := klog.KObj(updateRun)
473473
updateRunSpec := updateRun.GetUpdateRunSpec()
474-
placementName := placementKey.Name
475474

476-
resourceSnapshotObjs, err := r.getResourceSnapshotObjs(ctx, updateRunSpec, placementName, placementKey, updateRunRef)
475+
resourceSnapshotObjs, err := r.getResourceSnapshotObjs(ctx, placementKey, updateRun)
477476
if err != nil {
478477
return err
479478
}
@@ -487,6 +486,7 @@ func (r *Reconciler) recordOverrideSnapshots(ctx context.Context, placementKey t
487486
break
488487
}
489488
}
489+
490490
// No masterResourceSnapshot found.
491491
if masterResourceSnapshot == nil {
492492
err := controller.NewUnexpectedBehaviorError(fmt.Errorf("no master resourceSnapshot found for placement %s", placementKey))
@@ -497,14 +497,10 @@ func (r *Reconciler) recordOverrideSnapshots(ctx context.Context, placementKey t
497497

498498
klog.V(2).InfoS("Found master resourceSnapshot", "placement", placementKey, "masterResourceSnapshot", masterResourceSnapshot.GetName(), "updateRun", updateRunRef)
499499

500-
// Update the resource snapshot name in the UpdateRun status.
500+
// Record the resource snapshot index used.
501501
updateRunStatus := updateRun.GetUpdateRunStatus()
502-
updateRunStatus.ResourceSnapshotName = masterResourceSnapshot.GetName()
503-
if updateErr := r.Client.Status().Update(ctx, updateRun); updateErr != nil {
504-
klog.ErrorS(updateErr, "Failed to update the UpdateRun status with resource snapshot name", "updateRun", klog.KObj(updateRun), "resourceSnapshot", klog.KObj(masterResourceSnapshot))
505-
// updateErr can be retried.
506-
return controller.NewUpdateIgnoreConflictError(updateErr)
507-
}
502+
updateRunStatus.ResourceSnapshotIndexUsed = masterResourceSnapshot.GetLabels()[placementv1beta1.ResourceIndexLabel]
503+
updateRun.SetUpdateRunStatus(*updateRunStatus)
508504

509505
resourceSnapshotRef := klog.KObj(masterResourceSnapshot)
510506
// Fetch all the matching overrides.
@@ -534,20 +530,24 @@ func (r *Reconciler) recordOverrideSnapshots(ctx context.Context, placementKey t
534530

535531
// getResourceSnapshotObjs retrieves the list of resource snapshot objects from the specified ResourceSnapshotIndex.
536532
// If ResourceSnapshotIndex is unspecified, it returns the list of latest resource snapshots.
537-
func (r *Reconciler) getResourceSnapshotObjs(ctx context.Context, updateRunSpec *placementv1beta1.UpdateRunSpec, placementName string, placementKey types.NamespacedName, updateRunRef klog.ObjectRef) ([]placementv1beta1.ResourceSnapshotObj, error) {
533+
func (r *Reconciler) getResourceSnapshotObjs(ctx context.Context, placementKey types.NamespacedName, updateRun placementv1beta1.UpdateRunObj) ([]placementv1beta1.ResourceSnapshotObj, error) {
534+
updateRunRef := klog.KObj(updateRun)
535+
updateRunSpec := updateRun.GetUpdateRunSpec()
538536
var resourceSnapshotObjs []placementv1beta1.ResourceSnapshotObj
539537
if updateRunSpec.ResourceSnapshotIndex != "" {
540538
snapshotIndex, err := strconv.Atoi(updateRunSpec.ResourceSnapshotIndex)
541539
if err != nil || snapshotIndex < 0 {
542540
err := controller.NewUserError(fmt.Errorf("invalid resource snapshot index `%s` provided, expected an integer >= 0", updateRunSpec.ResourceSnapshotIndex))
543541
klog.ErrorS(err, "Failed to parse the resource snapshot index", "updateRun", updateRunRef)
542+
// no more retries here.
544543
return nil, fmt.Errorf("%w: %s", errInitializedFailed, err.Error())
545544
}
546545

547-
resourceSnapshotList, err := controller.ListAllResourceSnapshotWithAnIndex(ctx, r.Client, updateRunSpec.ResourceSnapshotIndex, placementName, placementKey.Namespace)
546+
resourceSnapshotList, err := controller.ListAllResourceSnapshotWithAnIndex(ctx, r.Client, updateRunSpec.ResourceSnapshotIndex, placementKey.Name, placementKey.Namespace)
548547
if err != nil {
549548
klog.ErrorS(err, "Failed to list the resourceSnapshots associated with the placement",
550549
"placement", placementKey, "resourceSnapshotIndex", snapshotIndex, "updateRun", updateRunRef)
550+
// list err can be retried.
551551
return nil, controller.NewAPIServerError(true, err)
552552
}
553553

@@ -561,18 +561,20 @@ func (r *Reconciler) getResourceSnapshotObjs(ctx context.Context, updateRunSpec
561561
return resourceSnapshotObjs, nil
562562
}
563563

564+
klog.V(2).InfoS("No resource snapshot index specified, fetching latest resource snapshots", "placement", placementKey, "updateRun", updateRunRef)
564565
latestResourceSnapshots, err := controller.ListLatestResourceSnapshots(ctx, r.Client, placementKey)
565566
if err != nil {
566567
klog.ErrorS(err, "Failed to list the latest resourceSnapshots associated with the placement",
567568
"placement", placementKey, "updateRun", updateRunRef)
569+
// list err can be retried.
568570
return nil, controller.NewAPIServerError(true, err)
569571
}
570572

571573
resourceSnapshotObjs = latestResourceSnapshots.GetResourceSnapshotObjs()
572574
if len(resourceSnapshotObjs) == 0 {
573-
err := controller.NewUserError(fmt.Errorf("no resourceSnapshots found for placement `%s`", placementKey))
574-
klog.ErrorS(err, "No resourceSnapshots found", "updateRun", updateRunRef)
575-
// no more retries here.
575+
err := fmt.Errorf("no latest resourceSnapshots found for placement `%s`. This might be a transient state, need retry", placementKey)
576+
klog.ErrorS(err, "No latest resourceSnapshots found for placement. This might be transient, need retry", "placement", placementKey, "updateRun", updateRunRef)
577+
// retryable error.
576578
return resourceSnapshotObjs, fmt.Errorf("%w: %s", errInitializedFailed, err.Error())
577579
}
578580
return resourceSnapshotObjs, nil

0 commit comments

Comments
 (0)