Skip to content

Commit d1f199f

Browse files
committed
address comments
Signed-off-by: Britania Rodriguez Reyes <britaniar@microsoft.com>
1 parent c495d5c commit d1f199f

File tree

3 files changed

+298
-80
lines changed

3 files changed

+298
-80
lines changed

pkg/controllers/workgenerator/controller.go

Lines changed: 75 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req controllerruntime.Reques
155155

156156
workUpdated := false
157157
overrideSucceeded := false
158-
workDeleted := false
158+
workDeleted := make(map[string]*fleetv1beta1.Work)
159159
// list all the corresponding works
160160
works, syncErr := r.listAllWorksAssociated(ctx, &resourceBinding)
161161
if syncErr == nil {
@@ -221,10 +221,10 @@ func (r *Reconciler) Reconcile(ctx context.Context, req controllerruntime.Reques
221221
Message: "All of the works are synchronized to the latest",
222222
})
223223
switch {
224-
case workDeleted:
224+
case len(workDeleted) != 0:
225225
// Some Work object(s) are being deleted; set a False Applied condition which signals
226226
// that resources are in the process of being deleted.
227-
klog.V(2).InfoS("Some work being deleted", "resourceBinding", bindingRef)
227+
klog.V(2).InfoS("Some work being deleted", "resourceBinding", bindingRef, "works", workDeleted)
228228
setBindingStatus(workDeleted, works, &resourceBinding)
229229
syncErr = controller.NewUserError(fmt.Errorf("some work objects are being deleted"))
230230
case !workUpdated:
@@ -404,9 +404,7 @@ func (r *Reconciler) listAllWorksAssociated(ctx context.Context, resourceBinding
404404
return nil, controller.NewAPIServerError(true, err)
405405
}
406406
for _, work := range workList.Items {
407-
if work.DeletionTimestamp == nil {
408-
currentWork[work.Name] = work.DeepCopy()
409-
}
407+
currentWork[work.Name] = work.DeepCopy()
410408
}
411409
klog.V(2).InfoS("Get all the work associated", "numOfWork", len(currentWork), "resourceBinding", klog.KObj(resourceBinding))
412410
return currentWork, nil
@@ -416,9 +414,11 @@ func (r *Reconciler) listAllWorksAssociated(ctx context.Context, resourceBinding
416414
// it returns
417415
// 1: if we apply the overrides successfully
418416
// 2: if we actually made any changes on the hub cluster
419-
func (r *Reconciler) syncAllWork(ctx context.Context, resourceBinding *fleetv1beta1.ClusterResourceBinding, existingWorks map[string]*fleetv1beta1.Work, cluster *clusterv1beta1.MemberCluster) (bool, bool, bool, error) {
417+
// 3: the deleted work objects if any
418+
// 4: an error if any
419+
func (r *Reconciler) syncAllWork(ctx context.Context, resourceBinding *fleetv1beta1.ClusterResourceBinding, existingWorks map[string]*fleetv1beta1.Work, cluster *clusterv1beta1.MemberCluster) (bool, bool, map[string]*fleetv1beta1.Work, error) {
420420
updateAny := atomic.NewBool(false)
421-
deletedAny := atomic.NewBool(false)
421+
deletedWork := make(map[string]*fleetv1beta1.Work)
422422
resourceBindingRef := klog.KObj(resourceBinding)
423423

424424
// Refresh the apply strategy for all existing works.
@@ -443,17 +443,17 @@ func (r *Reconciler) syncAllWork(ctx context.Context, resourceBinding *fleetv1be
443443
})
444444
}
445445
if updateErr := errs.Wait(); updateErr != nil {
446-
return false, false, false, updateErr
446+
return false, false, deletedWork, updateErr
447447
}
448448

449449
// the hash256 function can handle empty list https://go.dev/play/p/_4HW17fooXM
450450
resourceOverrideSnapshotHash, err := resource.HashOf(resourceBinding.Spec.ResourceOverrideSnapshots)
451451
if err != nil {
452-
return false, false, false, controller.NewUnexpectedBehaviorError(err)
452+
return false, false, deletedWork, controller.NewUnexpectedBehaviorError(err)
453453
}
454454
clusterResourceOverrideSnapshotHash, err := resource.HashOf(resourceBinding.Spec.ClusterResourceOverrideSnapshots)
455455
if err != nil {
456-
return false, false, false, controller.NewUnexpectedBehaviorError(err)
456+
return false, false, deletedWork, controller.NewUnexpectedBehaviorError(err)
457457
}
458458
// TODO: check all work synced first before fetching the snapshots after we put ParentResourceOverrideSnapshotHashAnnotation and ParentClusterResourceOverrideSnapshotHashAnnotation in all the work objects
459459

@@ -464,22 +464,22 @@ func (r *Reconciler) syncAllWork(ctx context.Context, resourceBinding *fleetv1be
464464
// the resourceIndex is deleted but the works might still be up to date with the binding.
465465
if areAllWorkSynced(existingWorks, resourceBinding, resourceOverrideSnapshotHash, clusterResourceOverrideSnapshotHash) {
466466
klog.V(2).InfoS("All the works are synced with the resourceBinding even if the resource snapshot index is removed", "resourceBinding", resourceBindingRef)
467-
return true, updateAny.Load(), deletedAny.Load(), nil
467+
return true, updateAny.Load(), deletedWork, nil
468468
}
469-
return false, false, false, controller.NewUserError(err)
469+
return false, false, deletedWork, controller.NewUserError(err)
470470
}
471471
// TODO(RZ): handle errResourceNotFullyCreated error so we don't need to wait for all the snapshots to be created
472-
return false, false, false, err
472+
return false, false, deletedWork, err
473473
}
474474

475475
croMap, err := r.fetchClusterResourceOverrideSnapshots(ctx, resourceBinding)
476476
if err != nil {
477-
return false, false, false, err
477+
return false, false, deletedWork, err
478478
}
479479

480480
roMap, err := r.fetchResourceOverrideSnapshots(ctx, resourceBinding)
481481
if err != nil {
482-
return false, false, false, err
482+
return false, false, deletedWork, err
483483
}
484484

485485
// issue all the create/update requests for the corresponding works for each snapshot in parallel
@@ -492,15 +492,15 @@ func (r *Reconciler) syncAllWork(ctx context.Context, resourceBinding *fleetv1be
492492
workNamePrefix, err := getWorkNamePrefixFromSnapshotName(snapshot)
493493
if err != nil {
494494
klog.ErrorS(err, "Encountered a mal-formatted resource snapshot", "resourceSnapshot", klog.KObj(snapshot))
495-
return false, false, false, err
495+
return false, false, deletedWork, err
496496
}
497497
var simpleManifests []fleetv1beta1.Manifest
498498
for j := range snapshot.Spec.SelectedResources {
499499
selectedResource := snapshot.Spec.SelectedResources[j].DeepCopy()
500500
// TODO: override the content of the wrapped resource instead of the envelope itself
501501
resourceDeleted, overrideErr := r.applyOverrides(selectedResource, cluster, croMap, roMap)
502502
if overrideErr != nil {
503-
return false, false, false, overrideErr
503+
return false, false, deletedWork, overrideErr
504504
}
505505
if resourceDeleted {
506506
klog.V(2).InfoS("The resource is deleted by the override rules", "snapshot", klog.KObj(snapshot), "selectedResource", snapshot.Spec.SelectedResources[j])
@@ -511,14 +511,14 @@ func (r *Reconciler) syncAllWork(ctx context.Context, resourceBinding *fleetv1be
511511
var uResource unstructured.Unstructured
512512
if unMarshallErr := uResource.UnmarshalJSON(selectedResource.Raw); unMarshallErr != nil {
513513
klog.ErrorS(unMarshallErr, "work has invalid content", "snapshot", klog.KObj(snapshot), "selectedResource", selectedResource.Raw)
514-
return true, false, false, controller.NewUnexpectedBehaviorError(unMarshallErr)
514+
return true, false, deletedWork, controller.NewUnexpectedBehaviorError(unMarshallErr)
515515
}
516516
if uResource.GetObjectKind().GroupVersionKind() == utils.ConfigMapGVK &&
517517
len(uResource.GetAnnotations()[fleetv1beta1.EnvelopeConfigMapAnnotation]) != 0 {
518518
// get a work object for the enveloped configMap
519519
work, err := r.getConfigMapEnvelopWorkObj(ctx, workNamePrefix, resourceBinding, snapshot, &uResource, resourceOverrideSnapshotHash, clusterResourceOverrideSnapshotHash)
520520
if err != nil {
521-
return true, false, false, err
521+
return true, false, deletedWork, err
522522
}
523523
activeWork[work.Name] = work
524524
newWork = append(newWork, work)
@@ -559,24 +559,38 @@ func (r *Reconciler) syncAllWork(ctx context.Context, resourceBinding *fleetv1be
559559
continue
560560
}
561561
errs.Go(func() error {
562-
if err := r.Client.Delete(ctx, work); err != nil {
563-
if !apierrors.IsNotFound(err) {
564-
klog.ErrorS(err, "Failed to delete the no longer needed work", "work", klog.KObj(work))
565-
return controller.NewAPIServerError(false, err)
566-
}
562+
if err := r.deleteWork(cctx, work, updateAny); err != nil {
563+
klog.ErrorS(err, "Failed to delete the work", "work", klog.KObj(work))
564+
return controller.NewAPIServerError(false, err)
567565
}
568-
klog.V(2).InfoS("Deleted the work that is not associated with any resource snapshot", "work", klog.KObj(work))
569-
deletedAny.Store(true)
566+
deletedWork[work.Name] = work
570567
return nil
571568
})
572569
}
573570

574571
// wait for all the create/update/delete requests to finish
575572
if updateErr := errs.Wait(); updateErr != nil {
576-
return true, false, false, updateErr
573+
return true, false, deletedWork, updateErr
574+
}
575+
klog.V(2).InfoS("Successfully synced all the work associated with the resourceBinding", "updateAny", updateAny.Load(), "numOfDeletedWork", len(deletedWork), "resourceBinding", resourceBindingRef)
576+
return true, updateAny.Load(), deletedWork, nil
577+
}
578+
579+
// deleteWork deletes the work object if it is not associated with any resource snapshot.
580+
func (r *Reconciler) deleteWork(ctx context.Context, work *fleetv1beta1.Work, updateAny *atomic.Bool) error {
581+
if work.DeletionTimestamp == nil {
582+
if err := r.Client.Delete(ctx, work); err != nil {
583+
if !apierrors.IsNotFound(err) {
584+
klog.ErrorS(err, "Failed to delete the no longer needed work", "work", klog.KObj(work))
585+
return err
586+
}
587+
}
588+
klog.V(2).InfoS("Deleted the work that is not associated with any resource snapshot", "work", klog.KObj(work))
589+
} else {
590+
klog.V(2).InfoS("Work is in the process of being deleted", "work", klog.KObj(work), "deletionTimestamp", work.DeletionTimestamp)
591+
updateAny.Store(true)
577592
}
578-
klog.V(2).InfoS("Successfully synced all the work associated with the resourceBinding", "updateAny", updateAny.Load(), "deletedAny", deletedAny.Load(), "resourceBinding", resourceBindingRef)
579-
return true, updateAny.Load(), deletedAny.Load(), nil
593+
return nil
580594
}
581595

582596
// syncApplyStrategy syncs the apply strategy specified on a ClusterResourceBinding object
@@ -853,7 +867,7 @@ const (
853867
)
854868

855869
// setBindingStatus sets the binding status based on the works associated with the binding.
856-
func setBindingStatus(workDeleted bool, works map[string]*fleetv1beta1.Work, resourceBinding *fleetv1beta1.ClusterResourceBinding) {
870+
func setBindingStatus(workDeleted map[string]*fleetv1beta1.Work, works map[string]*fleetv1beta1.Work, resourceBinding *fleetv1beta1.ClusterResourceBinding) {
857871
bindingRef := klog.KObj(resourceBinding)
858872

859873
// Note (chenyu1): the work generator will refresh the status of a ClusterResourceBinding using
@@ -1015,7 +1029,7 @@ func setBindingStatus(workDeleted bool, works map[string]*fleetv1beta1.Work, res
10151029
//
10161030
// The Applied condition of a ClusterResourceBinding object is set to True if and only if all the
10171031
// related Work objects have their Applied condition set to True.
1018-
func setAllWorkAppliedCondition(workDeleted bool, works map[string]*fleetv1beta1.Work, binding *fleetv1beta1.ClusterResourceBinding) workConditionSummarizedStatus {
1032+
func setAllWorkAppliedCondition(workDeleted map[string]*fleetv1beta1.Work, works map[string]*fleetv1beta1.Work, binding *fleetv1beta1.ClusterResourceBinding) workConditionSummarizedStatus {
10191033
// Fleet here makes a clear distinction between incomplete, failed, and successful apply operations.
10201034
// This is to ensure that stale apply information (esp. those set before
10211035
// an apply strategy change) will not leak into the current apply operations.
@@ -1025,35 +1039,45 @@ func setAllWorkAppliedCondition(workDeleted bool, works map[string]*fleetv1beta1
10251039
var firstWorkWithIncompleteApplyOp *fleetv1beta1.Work
10261040
var firstWorkWithFailedApplyOp *fleetv1beta1.Work
10271041

1028-
for _, w := range works {
1029-
applyCond := meta.FindStatusCondition(w.Status.Conditions, fleetv1beta1.WorkConditionTypeApplied)
1030-
switch {
1031-
case condition.IsConditionStatusTrue(applyCond, w.GetGeneration()):
1032-
// The Work object has completed the apply op successfully.
1033-
case condition.IsConditionStatusFalse(applyCond, w.GetGeneration()):
1034-
// An error has occurred during the apply op.
1035-
areAllWorksApplyOpsSuccessful = false
1036-
if firstWorkWithFailedApplyOp == nil {
1037-
firstWorkWithFailedApplyOp = w
1038-
}
1039-
default:
1040-
// The Work object has not yet completed the apply op.
1041-
areAllWorksApplyOpsCompleted = false
1042-
if firstWorkWithIncompleteApplyOp == nil {
1043-
firstWorkWithIncompleteApplyOp = w
1042+
var firstWorkDeleted *fleetv1beta1.Work
1043+
1044+
for _, w := range workDeleted {
1045+
if firstWorkDeleted == nil {
1046+
firstWorkDeleted = w
1047+
}
1048+
}
1049+
1050+
if firstWorkDeleted == nil {
1051+
for _, w := range works {
1052+
applyCond := meta.FindStatusCondition(w.Status.Conditions, fleetv1beta1.WorkConditionTypeApplied)
1053+
switch {
1054+
case condition.IsConditionStatusTrue(applyCond, w.GetGeneration()):
1055+
// The Work object has completed the apply op successfully.
1056+
case condition.IsConditionStatusFalse(applyCond, w.GetGeneration()):
1057+
// An error has occurred during the apply op.
1058+
areAllWorksApplyOpsSuccessful = false
1059+
if firstWorkWithFailedApplyOp == nil {
1060+
firstWorkWithFailedApplyOp = w
1061+
}
1062+
default:
1063+
// The Work object has not yet completed the apply op.
1064+
areAllWorksApplyOpsCompleted = false
1065+
if firstWorkWithIncompleteApplyOp == nil {
1066+
firstWorkWithIncompleteApplyOp = w
1067+
}
10441068
}
10451069
}
10461070
}
10471071

10481072
switch {
1049-
case workDeleted:
1073+
case len(workDeleted) != 0:
10501074
// Some work objects are being deleted.
1051-
klog.V(2).InfoS("Some works have not yet completed the apply op as some are deleting", "binding", klog.KObj(binding))
1075+
klog.V(2).InfoS("Some works have not yet completed the apply op as some are deleting", "binding", klog.KObj(binding), "firstWorkDeleted", klog.KObj(firstWorkDeleted))
10521076
binding.SetConditions(metav1.Condition{
10531077
Status: metav1.ConditionFalse,
10541078
Type: string(fleetv1beta1.ResourceBindingApplied),
10551079
Reason: condition.WorkNotAppliedReason,
1056-
Message: "Some work objects have been deleted. Some works have not yet completed the apply op.",
1080+
Message: fmt.Sprintf("Some work objects have been deleted. Work object %s is deleting", firstWorkDeleted.Name),
10571081
ObservedGeneration: binding.GetGeneration(),
10581082
})
10591083
return workConditionSummarizedStatusFalse

0 commit comments

Comments
 (0)