@@ -155,7 +155,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req controllerruntime.Reques
155155
156156 workUpdated := false
157157 overrideSucceeded := false
158- workDeleted := false
158+ workDeleted := make ( map [ string ] * fleetv1beta1. Work )
159159 // list all the corresponding works
160160 works , syncErr := r .listAllWorksAssociated (ctx , & resourceBinding )
161161 if syncErr == nil {
@@ -221,10 +221,10 @@ func (r *Reconciler) Reconcile(ctx context.Context, req controllerruntime.Reques
221221 Message : "All of the works are synchronized to the latest" ,
222222 })
223223 switch {
224- case workDeleted :
224+ case len ( workDeleted ) != 0 :
225225 // Some Work object(s) are being deleted; set a False Applied condition which signals
226226 // that resources are in the process of being deleted.
227- klog .V (2 ).InfoS ("Some work being deleted" , "resourceBinding" , bindingRef )
227+ klog .V (2 ).InfoS ("Some work being deleted" , "resourceBinding" , bindingRef , "works" , workDeleted )
228228 setBindingStatus (workDeleted , works , & resourceBinding )
229229 syncErr = controller .NewUserError (fmt .Errorf ("some work objects are being deleted" ))
230230 case ! workUpdated :
@@ -404,9 +404,7 @@ func (r *Reconciler) listAllWorksAssociated(ctx context.Context, resourceBinding
404404 return nil , controller .NewAPIServerError (true , err )
405405 }
406406 for _ , work := range workList .Items {
407- if work .DeletionTimestamp == nil {
408- currentWork [work .Name ] = work .DeepCopy ()
409- }
407+ currentWork [work .Name ] = work .DeepCopy ()
410408 }
411409 klog .V (2 ).InfoS ("Get all the work associated" , "numOfWork" , len (currentWork ), "resourceBinding" , klog .KObj (resourceBinding ))
412410 return currentWork , nil
@@ -416,9 +414,11 @@ func (r *Reconciler) listAllWorksAssociated(ctx context.Context, resourceBinding
416414// it returns
417415// 1: if we apply the overrides successfully
418416// 2: if we actually made any changes on the hub cluster
419- func (r * Reconciler ) syncAllWork (ctx context.Context , resourceBinding * fleetv1beta1.ClusterResourceBinding , existingWorks map [string ]* fleetv1beta1.Work , cluster * clusterv1beta1.MemberCluster ) (bool , bool , bool , error ) {
417+ // 3: the deleted work objects if any
418+ // 4: an error if any
419+ func (r * Reconciler ) syncAllWork (ctx context.Context , resourceBinding * fleetv1beta1.ClusterResourceBinding , existingWorks map [string ]* fleetv1beta1.Work , cluster * clusterv1beta1.MemberCluster ) (bool , bool , map [string ]* fleetv1beta1.Work , error ) {
420420 updateAny := atomic .NewBool (false )
421- deletedAny := atomic . NewBool ( false )
421+ deletedWork := make ( map [ string ] * fleetv1beta1. Work )
422422 resourceBindingRef := klog .KObj (resourceBinding )
423423
424424 // Refresh the apply strategy for all existing works.
@@ -443,17 +443,17 @@ func (r *Reconciler) syncAllWork(ctx context.Context, resourceBinding *fleetv1be
443443 })
444444 }
445445 if updateErr := errs .Wait (); updateErr != nil {
446- return false , false , false , updateErr
446+ return false , false , deletedWork , updateErr
447447 }
448448
449449 // the hash256 function can handle empty list https://go.dev/play/p/_4HW17fooXM
450450 resourceOverrideSnapshotHash , err := resource .HashOf (resourceBinding .Spec .ResourceOverrideSnapshots )
451451 if err != nil {
452- return false , false , false , controller .NewUnexpectedBehaviorError (err )
452+ return false , false , deletedWork , controller .NewUnexpectedBehaviorError (err )
453453 }
454454 clusterResourceOverrideSnapshotHash , err := resource .HashOf (resourceBinding .Spec .ClusterResourceOverrideSnapshots )
455455 if err != nil {
456- return false , false , false , controller .NewUnexpectedBehaviorError (err )
456+ return false , false , deletedWork , controller .NewUnexpectedBehaviorError (err )
457457 }
458458 // TODO: check all work synced first before fetching the snapshots after we put ParentResourceOverrideSnapshotHashAnnotation and ParentClusterResourceOverrideSnapshotHashAnnotation in all the work objects
459459
@@ -464,22 +464,22 @@ func (r *Reconciler) syncAllWork(ctx context.Context, resourceBinding *fleetv1be
464464 // the resourceIndex is deleted but the works might still be up to date with the binding.
465465 if areAllWorkSynced (existingWorks , resourceBinding , resourceOverrideSnapshotHash , clusterResourceOverrideSnapshotHash ) {
466466 klog .V (2 ).InfoS ("All the works are synced with the resourceBinding even if the resource snapshot index is removed" , "resourceBinding" , resourceBindingRef )
467- return true , updateAny .Load (), deletedAny . Load () , nil
467+ return true , updateAny .Load (), deletedWork , nil
468468 }
469- return false , false , false , controller .NewUserError (err )
469+ return false , false , deletedWork , controller .NewUserError (err )
470470 }
471471 // TODO(RZ): handle errResourceNotFullyCreated error so we don't need to wait for all the snapshots to be created
472- return false , false , false , err
472+ return false , false , deletedWork , err
473473 }
474474
475475 croMap , err := r .fetchClusterResourceOverrideSnapshots (ctx , resourceBinding )
476476 if err != nil {
477- return false , false , false , err
477+ return false , false , deletedWork , err
478478 }
479479
480480 roMap , err := r .fetchResourceOverrideSnapshots (ctx , resourceBinding )
481481 if err != nil {
482- return false , false , false , err
482+ return false , false , deletedWork , err
483483 }
484484
485485 // issue all the create/update requests for the corresponding works for each snapshot in parallel
@@ -492,15 +492,15 @@ func (r *Reconciler) syncAllWork(ctx context.Context, resourceBinding *fleetv1be
492492 workNamePrefix , err := getWorkNamePrefixFromSnapshotName (snapshot )
493493 if err != nil {
494494 klog .ErrorS (err , "Encountered a mal-formatted resource snapshot" , "resourceSnapshot" , klog .KObj (snapshot ))
495- return false , false , false , err
495+ return false , false , deletedWork , err
496496 }
497497 var simpleManifests []fleetv1beta1.Manifest
498498 for j := range snapshot .Spec .SelectedResources {
499499 selectedResource := snapshot .Spec .SelectedResources [j ].DeepCopy ()
500500 // TODO: override the content of the wrapped resource instead of the envelope itself
501501 resourceDeleted , overrideErr := r .applyOverrides (selectedResource , cluster , croMap , roMap )
502502 if overrideErr != nil {
503- return false , false , false , overrideErr
503+ return false , false , deletedWork , overrideErr
504504 }
505505 if resourceDeleted {
506506 klog .V (2 ).InfoS ("The resource is deleted by the override rules" , "snapshot" , klog .KObj (snapshot ), "selectedResource" , snapshot .Spec .SelectedResources [j ])
@@ -511,14 +511,14 @@ func (r *Reconciler) syncAllWork(ctx context.Context, resourceBinding *fleetv1be
511511 var uResource unstructured.Unstructured
512512 if unMarshallErr := uResource .UnmarshalJSON (selectedResource .Raw ); unMarshallErr != nil {
513513 klog .ErrorS (unMarshallErr , "work has invalid content" , "snapshot" , klog .KObj (snapshot ), "selectedResource" , selectedResource .Raw )
514- return true , false , false , controller .NewUnexpectedBehaviorError (unMarshallErr )
514+ return true , false , deletedWork , controller .NewUnexpectedBehaviorError (unMarshallErr )
515515 }
516516 if uResource .GetObjectKind ().GroupVersionKind () == utils .ConfigMapGVK &&
517517 len (uResource .GetAnnotations ()[fleetv1beta1 .EnvelopeConfigMapAnnotation ]) != 0 {
518518 // get a work object for the enveloped configMap
519519 work , err := r .getConfigMapEnvelopWorkObj (ctx , workNamePrefix , resourceBinding , snapshot , & uResource , resourceOverrideSnapshotHash , clusterResourceOverrideSnapshotHash )
520520 if err != nil {
521- return true , false , false , err
521+ return true , false , deletedWork , err
522522 }
523523 activeWork [work .Name ] = work
524524 newWork = append (newWork , work )
@@ -559,24 +559,38 @@ func (r *Reconciler) syncAllWork(ctx context.Context, resourceBinding *fleetv1be
559559 continue
560560 }
561561 errs .Go (func () error {
562- if err := r .Client .Delete (ctx , work ); err != nil {
563- if ! apierrors .IsNotFound (err ) {
564- klog .ErrorS (err , "Failed to delete the no longer needed work" , "work" , klog .KObj (work ))
565- return controller .NewAPIServerError (false , err )
566- }
562+ if err := r .deleteWork (cctx , work , updateAny ); err != nil {
563+ klog .ErrorS (err , "Failed to delete the work" , "work" , klog .KObj (work ))
564+ return controller .NewAPIServerError (false , err )
567565 }
568- klog .V (2 ).InfoS ("Deleted the work that is not associated with any resource snapshot" , "work" , klog .KObj (work ))
569- deletedAny .Store (true )
566+ deletedWork [work .Name ] = work
570567 return nil
571568 })
572569 }
573570
574571 // wait for all the create/update/delete requests to finish
575572 if updateErr := errs .Wait (); updateErr != nil {
576- return true , false , false , updateErr
573+ return true , false , deletedWork , updateErr
574+ }
575+ klog .V (2 ).InfoS ("Successfully synced all the work associated with the resourceBinding" , "updateAny" , updateAny .Load (), "numOfDeletedWork" , len (deletedWork ), "resourceBinding" , resourceBindingRef )
576+ return true , updateAny .Load (), deletedWork , nil
577+ }
578+
579+ // deleteWork deletes the work object if it is not associated with any resource snapshot.
580+ func (r * Reconciler ) deleteWork (ctx context.Context , work * fleetv1beta1.Work , updateAny * atomic.Bool ) error {
581+ if work .DeletionTimestamp == nil {
582+ if err := r .Client .Delete (ctx , work ); err != nil {
583+ if ! apierrors .IsNotFound (err ) {
584+ klog .ErrorS (err , "Failed to delete the no longer needed work" , "work" , klog .KObj (work ))
585+ return err
586+ }
587+ }
588+ klog .V (2 ).InfoS ("Deleted the work that is not associated with any resource snapshot" , "work" , klog .KObj (work ))
589+ } else {
590+ klog .V (2 ).InfoS ("Work is in the process of being deleted" , "work" , klog .KObj (work ), "deletionTimestamp" , work .DeletionTimestamp )
591+ updateAny .Store (true )
577592 }
578- klog .V (2 ).InfoS ("Successfully synced all the work associated with the resourceBinding" , "updateAny" , updateAny .Load (), "deletedAny" , deletedAny .Load (), "resourceBinding" , resourceBindingRef )
579- return true , updateAny .Load (), deletedAny .Load (), nil
593+ return nil
580594}
581595
582596// syncApplyStrategy syncs the apply strategy specified on a ClusterResourceBinding object
@@ -853,7 +867,7 @@ const (
853867)
854868
855869// setBindingStatus sets the binding status based on the works associated with the binding.
856- func setBindingStatus (workDeleted bool , works map [string ]* fleetv1beta1.Work , resourceBinding * fleetv1beta1.ClusterResourceBinding ) {
870+ func setBindingStatus (workDeleted map [ string ] * fleetv1beta1. Work , works map [string ]* fleetv1beta1.Work , resourceBinding * fleetv1beta1.ClusterResourceBinding ) {
857871 bindingRef := klog .KObj (resourceBinding )
858872
859873 // Note (chenyu1): the work generator will refresh the status of a ClusterResourceBinding using
@@ -1015,7 +1029,7 @@ func setBindingStatus(workDeleted bool, works map[string]*fleetv1beta1.Work, res
10151029//
10161030// The Applied condition of a ClusterResourceBinding object is set to True if and only if all the
10171031// related Work objects have their Applied condition set to True.
1018- func setAllWorkAppliedCondition (workDeleted bool , works map [string ]* fleetv1beta1.Work , binding * fleetv1beta1.ClusterResourceBinding ) workConditionSummarizedStatus {
1032+ func setAllWorkAppliedCondition (workDeleted map [ string ] * fleetv1beta1. Work , works map [string ]* fleetv1beta1.Work , binding * fleetv1beta1.ClusterResourceBinding ) workConditionSummarizedStatus {
10191033 // Fleet here makes a clear distinction between incomplete, failed, and successful apply operations.
10201034 // This is to ensure that stale apply information (esp. those set before
10211035 // an apply strategy change) will not leak into the current apply operations.
@@ -1025,35 +1039,45 @@ func setAllWorkAppliedCondition(workDeleted bool, works map[string]*fleetv1beta1
10251039 var firstWorkWithIncompleteApplyOp * fleetv1beta1.Work
10261040 var firstWorkWithFailedApplyOp * fleetv1beta1.Work
10271041
1028- for _ , w := range works {
1029- applyCond := meta .FindStatusCondition (w .Status .Conditions , fleetv1beta1 .WorkConditionTypeApplied )
1030- switch {
1031- case condition .IsConditionStatusTrue (applyCond , w .GetGeneration ()):
1032- // The Work object has completed the apply op successfully.
1033- case condition .IsConditionStatusFalse (applyCond , w .GetGeneration ()):
1034- // An error has occurred during the apply op.
1035- areAllWorksApplyOpsSuccessful = false
1036- if firstWorkWithFailedApplyOp == nil {
1037- firstWorkWithFailedApplyOp = w
1038- }
1039- default :
1040- // The Work object has not yet completed the apply op.
1041- areAllWorksApplyOpsCompleted = false
1042- if firstWorkWithIncompleteApplyOp == nil {
1043- firstWorkWithIncompleteApplyOp = w
1042+ var firstWorkDeleted * fleetv1beta1.Work
1043+
1044+ for _ , w := range workDeleted {
1045+ if firstWorkDeleted == nil {
1046+ firstWorkDeleted = w
1047+ }
1048+ }
1049+
1050+ if firstWorkDeleted == nil {
1051+ for _ , w := range works {
1052+ applyCond := meta .FindStatusCondition (w .Status .Conditions , fleetv1beta1 .WorkConditionTypeApplied )
1053+ switch {
1054+ case condition .IsConditionStatusTrue (applyCond , w .GetGeneration ()):
1055+ // The Work object has completed the apply op successfully.
1056+ case condition .IsConditionStatusFalse (applyCond , w .GetGeneration ()):
1057+ // An error has occurred during the apply op.
1058+ areAllWorksApplyOpsSuccessful = false
1059+ if firstWorkWithFailedApplyOp == nil {
1060+ firstWorkWithFailedApplyOp = w
1061+ }
1062+ default :
1063+ // The Work object has not yet completed the apply op.
1064+ areAllWorksApplyOpsCompleted = false
1065+ if firstWorkWithIncompleteApplyOp == nil {
1066+ firstWorkWithIncompleteApplyOp = w
1067+ }
10441068 }
10451069 }
10461070 }
10471071
10481072 switch {
1049- case workDeleted :
1073+ case len ( workDeleted ) != 0 :
10501074 // Some work objects are being deleted.
1051- klog .V (2 ).InfoS ("Some works have not yet completed the apply op as some are deleting" , "binding" , klog .KObj (binding ))
1075+ klog .V (2 ).InfoS ("Some works have not yet completed the apply op as some are deleting" , "binding" , klog .KObj (binding ), "firstWorkDeleted" , klog . KObj ( firstWorkDeleted ) )
10521076 binding .SetConditions (metav1.Condition {
10531077 Status : metav1 .ConditionFalse ,
10541078 Type : string (fleetv1beta1 .ResourceBindingApplied ),
10551079 Reason : condition .WorkNotAppliedReason ,
1056- Message : "Some work objects have been deleted. Some works have not yet completed the apply op." ,
1080+ Message : fmt . Sprintf ( "Some work objects have been deleted. Work object %s is deleting" , firstWorkDeleted . Name ) ,
10571081 ObservedGeneration : binding .GetGeneration (),
10581082 })
10591083 return workConditionSummarizedStatusFalse
0 commit comments