Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 74 additions & 15 deletions pkg/controllers/updaterun/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,10 +105,18 @@ func (r *Reconciler) Reconcile(ctx context.Context, req runtime.Request) (runtim
defer emitUpdateRunStatusMetric(updateRun)

state := updateRun.GetUpdateRunSpec().State
updateRunStatus := updateRun.GetUpdateRunStatus()
if state == placementv1beta1.StateAbandoned {
succeedCond := meta.FindStatusCondition(updateRunStatus.Conditions, string(placementv1beta1.StagedUpdateRunConditionSucceeded))
if succeedCond != nil && succeedCond.Reason == condition.UpdateRunAbandonedReason {
// Terminal state reached as updateRun cannot be restarted after being abandoned.
klog.V(2).InfoS("The updateRun is abandoned, terminating", "state", state, "updateRun", runObjRef)
return runtime.Result{}, nil
}
}

var updatingStageIndex int
var toBeUpdatedBindings, toBeDeletedBindings []placementv1beta1.BindingObj
updateRunStatus := updateRun.GetUpdateRunStatus()
initCond := meta.FindStatusCondition(updateRunStatus.Conditions, string(placementv1beta1.StagedUpdateRunConditionInitialized))
// Check if initialized regardless of generation.
// The updateRun spec fields are immutable except for the state field. When the state changes,
Expand Down Expand Up @@ -158,35 +166,60 @@ func (r *Reconciler) Reconcile(ctx context.Context, req runtime.Request) (runtim
return runtime.Result{}, r.recordUpdateRunSucceeded(ctx, updateRun)
}

// Execute the updateRun.
if state == placementv1beta1.StateExecuted {
klog.V(2).InfoS("Continue to execute the updateRun", "state", state, "updatingStageIndex", updatingStageIndex, "updateRun", runObjRef)
switch state {
case placementv1beta1.StateInitialized:
klog.V(2).InfoS("The updateRun is initialized but not executed, waiting to execute", "state", state, "updateRun", runObjRef)
return runtime.Result{}, nil
case placementv1beta1.StateExecuted:
// Execute the updateRun.
klog.V(2).InfoS("Continue to execute the updateRun", "updatingStageIndex", updatingStageIndex, "updateRun", runObjRef)
finished, waitTime, execErr := r.execute(ctx, updateRun, updatingStageIndex, toBeUpdatedBindings, toBeDeletedBindings)
if errors.Is(execErr, errStagedUpdatedAborted) {
// errStagedUpdatedAborted cannot be retried.
return runtime.Result{}, r.recordUpdateRunFailed(ctx, updateRun, execErr.Error())
}

if finished {
klog.V(2).InfoS("The updateRun is completed", "updateRun", runObjRef)
return runtime.Result{}, r.recordUpdateRunSucceeded(ctx, updateRun)
}

// The execution is not finished yet or it encounters a retriable error.
// We need to record the status and requeue.
if updateErr := r.recordUpdateRunStatus(ctx, updateRun); updateErr != nil {
return runtime.Result{}, updateErr
return r.handleIncompleteUpdateRun(ctx, updateRun, waitTime, execErr, state, runObjRef)
case placementv1beta1.StateAbandoned:
// Abandon the updateRun.
klog.V(2).InfoS("Abandoning the updateRun", "state", state, "updatingStageIndex", updatingStageIndex, "updateRun", runObjRef)
finished, waitTime, execErr := r.abandon(updateRun, updatingStageIndex, toBeUpdatedBindings, toBeDeletedBindings)
if errors.Is(execErr, errStagedUpdatedAborted) {
// errStagedUpdatedAborted cannot be retried.
return runtime.Result{}, r.recordUpdateRunFailed(ctx, updateRun, execErr.Error())
}
klog.V(2).InfoS("The updateRun is not finished yet", "requeueWaitTime", waitTime, "execErr", execErr, "updateRun", runObjRef)
if execErr != nil {
return runtime.Result{}, execErr
if finished {
klog.V(2).InfoS("The updateRun is abandoned", "updateRun", runObjRef)
return runtime.Result{}, r.recordUpdateRunAbandoned(ctx, updateRun)
}
return runtime.Result{Requeue: true, RequeueAfter: waitTime}, nil
return r.handleIncompleteUpdateRun(ctx, updateRun, waitTime, execErr, state, runObjRef)
}
klog.V(2).InfoS("The updateRun is initialized but not executed, waiting to execute", "state", state, "updateRun", runObjRef)
return runtime.Result{}, nil
}

func (r *Reconciler) handleIncompleteUpdateRun(ctx context.Context, updateRun placementv1beta1.UpdateRunObj, waitTime time.Duration, execErr error, state placementv1beta1.State, runObjRef klog.ObjectRef) (runtime.Result, error) {
// The execution is not finished yet or it encounters a retriable error.
// We need to record the status and requeue.
if updateErr := r.recordUpdateRunStatus(ctx, updateRun); updateErr != nil {
return runtime.Result{}, updateErr
}

switch state {
case placementv1beta1.StateExecuted:
klog.V(2).InfoS("The updateRun is not finished yet", "state", state, "requeueWaitTime", waitTime, "execErr", execErr, "updateRun", runObjRef)
case placementv1beta1.StateAbandoned:
klog.V(2).InfoS("The updateRun is not finished abandoning yet", "state", state, "requeueWaitTime", waitTime, "execErr", execErr, "updateRun", runObjRef)
}

if execErr != nil {
return runtime.Result{}, execErr
}
return runtime.Result{Requeue: true, RequeueAfter: waitTime}, nil
}

// handleDelete handles the deletion of the updateRun object.
// We delete all the dependent resources, including approvalRequest objects, of the updateRun object.
func (r *Reconciler) handleDelete(ctx context.Context, updateRun placementv1beta1.UpdateRunObj) (bool, time.Duration, error) {
Expand Down Expand Up @@ -277,6 +310,32 @@ func (r *Reconciler) recordUpdateRunFailed(ctx context.Context, updateRun placem
return nil
}

// recordUpdateRunAbandoned records the succeeded and progressing condition as abandoned in the updateRun status.
func (r *Reconciler) recordUpdateRunAbandoned(ctx context.Context, updateRun placementv1beta1.UpdateRunObj) error {
updateRunStatus := updateRun.GetUpdateRunStatus()
meta.SetStatusCondition(&updateRunStatus.Conditions, metav1.Condition{
Type: string(placementv1beta1.StagedUpdateRunConditionProgressing),
Status: metav1.ConditionFalse,
ObservedGeneration: updateRun.GetGeneration(),
Reason: condition.UpdateRunAbandonedReason,
Message: "The update run has been abandoned",
})
meta.SetStatusCondition(&updateRunStatus.Conditions, metav1.Condition{
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What happens if an updateRun is already Succeeded, and then user tries to abandon the updateRun ?

Type: string(placementv1beta1.StagedUpdateRunConditionSucceeded),
Status: metav1.ConditionFalse,
ObservedGeneration: updateRun.GetGeneration(),
Reason: condition.UpdateRunAbandonedReason,
Message: "The update run has been abandoned",
})

if updateErr := r.Client.Status().Update(ctx, updateRun); updateErr != nil {
klog.ErrorS(updateErr, "Failed to update the updateRun status as abandoned", "updateRun", klog.KObj(updateRun))
// updateErr can be retried.
return controller.NewUpdateIgnoreConflictError(updateErr)
}
return nil
}

// recordUpdateRunStatus records the updateRun status.
func (r *Reconciler) recordUpdateRunStatus(ctx context.Context, updateRun placementv1beta1.UpdateRunObj) error {
if updateErr := r.Client.Status().Update(ctx, updateRun); updateErr != nil {
Expand Down
26 changes: 26 additions & 0 deletions pkg/controllers/updaterun/controller_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,26 @@ func generateWaitingMetric(updateRun *placementv1beta1.ClusterStagedUpdateRun) *
}
}

func generateAbandoningMetric(updateRun *placementv1beta1.ClusterStagedUpdateRun) *prometheusclientmodel.Metric {
return &prometheusclientmodel.Metric{
Label: generateMetricsLabels(updateRun, string(placementv1beta1.StagedUpdateRunConditionProgressing),
string(metav1.ConditionFalse), condition.UpdateRunAbandoningReason),
Gauge: &prometheusclientmodel.Gauge{
Value: ptr.To(float64(time.Now().UnixNano()) / 1e9),
},
}
}

func generateAbandonedMetric(updateRun *placementv1beta1.ClusterStagedUpdateRun) *prometheusclientmodel.Metric {
return &prometheusclientmodel.Metric{
Label: generateMetricsLabels(updateRun, string(placementv1beta1.StagedUpdateRunConditionSucceeded),
string(metav1.ConditionFalse), condition.UpdateRunAbandonedReason),
Gauge: &prometheusclientmodel.Gauge{
Value: ptr.To(float64(time.Now().UnixNano()) / 1e9),
},
}
}

func generateStuckMetric(updateRun *placementv1beta1.ClusterStagedUpdateRun) *prometheusclientmodel.Metric {
return &prometheusclientmodel.Metric{
Label: generateMetricsLabels(updateRun, string(placementv1beta1.StagedUpdateRunConditionProgressing),
Expand Down Expand Up @@ -823,3 +843,9 @@ func generateFalseProgressingCondition(obj client.Object, condType any, reason s
falseCond.Reason = reason
return falseCond
}

func generateFalseSucceededCondition(obj client.Object, condType any, reason string) metav1.Condition {
falseCond := generateFalseCondition(obj, condType)
falseCond.Reason = reason
return falseCond
}
12 changes: 9 additions & 3 deletions pkg/controllers/updaterun/execution.go
Original file line number Diff line number Diff line change
Expand Up @@ -564,8 +564,14 @@ func aggregateUpdateRunStatus(updateRun placementv1beta1.UpdateRunObj, stageName
if len(stuckClusterNames) > 0 {
markUpdateRunStuck(updateRun, stageName, strings.Join(stuckClusterNames, ", "))
} else {
// If there is no stuck cluster but some progress has been made, mark the update run as progressing.
markUpdateRunProgressing(updateRun)
switch updateRun.GetUpdateRunSpec().State {
case placementv1beta1.StateAbandoned:
// If the update run is being abandoned, mark it as abandoning.
markUpdateRunAbandoning(updateRun)
default:
// If there is no stuck cluster but some progress has been made, mark the update run as progressing.
markUpdateRunProgressing(updateRun)
}
}
}

Expand Down Expand Up @@ -669,7 +675,7 @@ func markUpdateRunProgressing(updateRun placementv1beta1.UpdateRunObj) {
})
}

// markUpdateRunProgressingIfNotWaitingOrStuck marks the update run as proegressing in memory if it's not marked as waiting or stuck already.
// markUpdateRunProgressingIfNotWaitingOrStuck marks the update run as progressing in memory if it's not marked as waiting or stuck already.
func markUpdateRunProgressingIfNotWaitingOrStuck(updateRun placementv1beta1.UpdateRunObj) {
updateRunStatus := updateRun.GetUpdateRunStatus()
progressingCond := meta.FindStatusCondition(updateRunStatus.Conditions, string(placementv1beta1.StagedUpdateRunConditionProgressing))
Expand Down
117 changes: 117 additions & 0 deletions pkg/controllers/updaterun/execution_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -702,6 +702,123 @@ var _ = Describe("UpdateRun execution tests - double stages", func() {
validateUpdateRunMetricsEmitted(generateWaitingMetric(updateRun), generateProgressingMetric(updateRun), generateStuckMetric(updateRun), generateFailedMetric(updateRun))
})
})

Context("Cluster staged update run should finish current updating clusters when abandoned", Ordered, func() {
var wantApprovalRequest *placementv1beta1.ClusterApprovalRequest
var wantMetrics []*promclient.Metric
BeforeAll(func() {
By("Creating a new clusterStagedUpdateRun")
updateRun.Spec.State = placementv1beta1.StateExecuted
Expect(k8sClient.Create(ctx, updateRun)).To(Succeed())

By("Validating the initialization succeeded and the execution has not started")
initialized := generateSucceededInitializationStatus(crp, updateRun, testResourceSnapshotIndex, policySnapshot, updateStrategy, clusterResourceOverride)
wantStatus = generateExecutionNotStartedStatus(updateRun, initialized)
validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "")

By("Validating the first beforeStage approvalRequest has been created")
wantApprovalRequest = &placementv1beta1.ClusterApprovalRequest{
ObjectMeta: metav1.ObjectMeta{
Name: updateRun.Status.StagesStatus[0].BeforeStageTaskStatus[0].ApprovalRequestName,
Labels: map[string]string{
placementv1beta1.TargetUpdatingStageNameLabel: updateRun.Status.StagesStatus[0].StageName,
placementv1beta1.TargetUpdateRunLabel: updateRun.Name,
placementv1beta1.IsLatestUpdateRunApprovalLabel: "true",
},
},
Spec: placementv1beta1.ApprovalRequestSpec{
TargetUpdateRun: updateRun.Name,
TargetStage: updateRun.Status.StagesStatus[0].StageName,
},
}
validateApprovalRequestCreated(wantApprovalRequest)

By("Checking update run status metrics are emitted")
wantMetrics = []*promclient.Metric{generateWaitingMetric(updateRun)}
validateUpdateRunMetricsEmitted(wantMetrics...)
})

It("Should accept the approval request and start to rollout 1st stage", func() {
By("Approving the approvalRequest")
approveClusterApprovalRequest(ctx, wantApprovalRequest.Name)

By("Validating the approvalRequest has ApprovalAccepted status")
Eventually(func() (bool, error) {
var approvalRequest placementv1beta1.ClusterApprovalRequest
if err := k8sClient.Get(ctx, types.NamespacedName{Name: wantApprovalRequest.Name}, &approvalRequest); err != nil {
return false, err
}
return condition.IsConditionStatusTrue(meta.FindStatusCondition(approvalRequest.Status.Conditions, string(placementv1beta1.ApprovalRequestConditionApprovalAccepted)), approvalRequest.Generation), nil
}, timeout, interval).Should(BeTrue(), "failed to validate the approvalRequest approval accepted")
// Approval task has been approved.
wantStatus.StagesStatus[0].BeforeStageTaskStatus[0].Conditions = append(wantStatus.StagesStatus[0].BeforeStageTaskStatus[0].Conditions,
generateTrueCondition(updateRun, placementv1beta1.StageTaskConditionApprovalRequestApproved))
})

It("Should mark the 1st cluster in the 1st stage as succeeded after marking the binding available", func() {
By("Validating the 1st clusterResourceBinding is updated to Bound")
binding := resourceBindings[numTargetClusters-1] // cluster-9
validateBindingState(ctx, binding, resourceSnapshot.Name, updateRun, 0)

By("Updating the 1st clusterResourceBinding to Available")
meta.SetStatusCondition(&binding.Status.Conditions, generateTrueCondition(binding, placementv1beta1.ResourceBindingAvailable))
Expect(k8sClient.Status().Update(ctx, binding)).Should(Succeed(), "failed to update the binding status")

// 1st stage started.
wantStatus = generateExecutionStartedStatus(updateRun, wantStatus)

By("Validating the 1st cluster has succeeded and 2nd cluster has started")
wantStatus.StagesStatus[0].Clusters[0].Conditions = append(wantStatus.StagesStatus[0].Clusters[0].Conditions, generateTrueCondition(updateRun, placementv1beta1.ClusterUpdatingConditionSucceeded))
wantStatus.StagesStatus[0].Clusters[1].Conditions = append(wantStatus.StagesStatus[0].Clusters[1].Conditions, generateTrueCondition(updateRun, placementv1beta1.ClusterUpdatingConditionStarted))
validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "")

By("Validating the 1st stage has startTime set")
Expect(updateRun.Status.StagesStatus[0].StartTime).ShouldNot(BeNil())

By("Checking update run status metrics are emitted")
wantMetrics = append(wantMetrics, generateProgressingMetric(updateRun))
validateUpdateRunMetricsEmitted(wantMetrics...)
})

It("Should start abandoning the update run when state is Abandon", func() {
By("Updating updateRun state to Abandon")
updateRun.Spec.State = placementv1beta1.StateAbandoned
Expect(k8sClient.Update(ctx, updateRun)).Should(Succeed(), "failed to update the updateRun state")

By("Validating update run is abandoning")
// Mark updateRun progressing condition as false with abandoning reason.
meta.SetStatusCondition(&wantStatus.StagesStatus[0].Conditions, generateFalseProgressingCondition(updateRun, placementv1beta1.StageUpdatingConditionProgressing, condition.StageUpdatingAbandoningReason))
meta.SetStatusCondition(&wantStatus.Conditions, generateFalseProgressingCondition(updateRun, placementv1beta1.StagedUpdateRunConditionProgressing, condition.UpdateRunAbandoningReason))
validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "")

By("Checking update run status metrics are emitted")
wantMetrics = append(wantMetrics, generateAbandoningMetric(updateRun))
validateUpdateRunMetricsEmitted(wantMetrics...)
})

It("Should mark the 2nd cluster in the 1st stage as succeeded after abandoning the updateRun", func() {
By("Validating the 2nd clusterResourceBinding is updated to Bound")
binding := resourceBindings[numTargetClusters-3] // cluster-7
validateBindingState(ctx, binding, resourceSnapshot.Name, updateRun, 0)

By("Updating the 2nd clusterResourceBinding to Available")
meta.SetStatusCondition(&binding.Status.Conditions, generateTrueCondition(binding, placementv1beta1.ResourceBindingAvailable))
Expect(k8sClient.Status().Update(ctx, binding)).Should(Succeed(), "failed to update the binding status")

By("Validating the 2nd cluster has succeeded")
// Mark 2nd cluster as succeeded.
wantStatus.StagesStatus[0].Clusters[1].Conditions = append(wantStatus.StagesStatus[0].Clusters[1].Conditions, generateTrueCondition(updateRun, placementv1beta1.ClusterUpdatingConditionSucceeded))
// Mark updateRun as abandoned.
meta.SetStatusCondition(&wantStatus.StagesStatus[0].Conditions, generateFalseProgressingCondition(updateRun, placementv1beta1.StageUpdatingConditionProgressing, condition.StageUpdatingAbandonedReason))
meta.SetStatusCondition(&wantStatus.Conditions, generateFalseProgressingCondition(updateRun, placementv1beta1.StagedUpdateRunConditionProgressing, condition.UpdateRunAbandonedReason))
meta.SetStatusCondition(&wantStatus.Conditions, generateFalseSucceededCondition(updateRun, placementv1beta1.StagedUpdateRunConditionSucceeded, condition.UpdateRunAbandonedReason))
validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "")

By("Checking update run status metrics are emitted")
wantMetrics = append(wantMetrics, generateAbandonedMetric(updateRun))
validateUpdateRunMetricsEmitted(wantMetrics...)
})
})
})

var _ = Describe("UpdateRun execution tests - single stage", func() {
Expand Down
Loading
Loading