Skip to content

Commit 939a3f9

Browse files
committed
update to workapplier to use foreground deletion, add and fix tests
Signed-off-by: Britania Rodriguez Reyes <britaniar@microsoft.com>
1 parent d9e0d1b commit 939a3f9

File tree

7 files changed

+190
-22
lines changed

7 files changed

+190
-22
lines changed

pkg/controllers/workapplier/controller.go

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -417,7 +417,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu
417417
Kind: fleetv1beta1.AppliedWorkKind,
418418
Name: appliedWork.GetName(),
419419
UID: appliedWork.GetUID(),
420-
BlockOwnerDeletion: ptr.To(false),
420+
BlockOwnerDeletion: ptr.To(true),
421421
}
422422

423423
// Set the default values for the Work object to avoid additional validation logic in the
@@ -487,7 +487,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu
487487

488488
// garbageCollectAppliedWork deletes the appliedWork and all the manifests associated with it from the cluster.
489489
func (r *Reconciler) garbageCollectAppliedWork(ctx context.Context, work *fleetv1beta1.Work) (ctrl.Result, error) {
490-
deletePolicy := metav1.DeletePropagationBackground
490+
deletePolicy := metav1.DeletePropagationForeground
491491
if !controllerutil.ContainsFinalizer(work, fleetv1beta1.WorkFinalizer) {
492492
return ctrl.Result{}, nil
493493
}
@@ -496,18 +496,22 @@ func (r *Reconciler) garbageCollectAppliedWork(ctx context.Context, work *fleetv
496496
appliedWork := fleetv1beta1.AppliedWork{
497497
ObjectMeta: metav1.ObjectMeta{Name: work.Name},
498498
}
499-
err := r.spokeClient.Delete(ctx, &appliedWork, &client.DeleteOptions{PropagationPolicy: &deletePolicy})
500-
switch {
501-
case apierrors.IsNotFound(err):
502-
klog.V(2).InfoS("The appliedWork is already deleted", "appliedWork", work.Name)
503-
case err != nil:
504-
klog.ErrorS(err, "Failed to delete the appliedWork", "appliedWork", work.Name)
499+
if err := r.spokeClient.Delete(ctx, &appliedWork, &client.DeleteOptions{PropagationPolicy: &deletePolicy}); err != nil {
500+
if apierrors.IsNotFound(err) {
501+
klog.V(2).InfoS("The appliedWork is already deleted", "appliedWork", work.Name)
502+
return r.removeWorkFinalizer(ctx, work)
503+
}
504+
klog.V(2).ErrorS(err, "Failed to delete the appliedWork", "appliedWork", work.Name)
505505
return ctrl.Result{}, controller.NewAPIServerError(false, err)
506-
default:
507-
klog.InfoS("Successfully deleted the appliedWork", "appliedWork", work.Name)
508506
}
509-
controllerutil.RemoveFinalizer(work, fleetv1beta1.WorkFinalizer)
510507

508+
klog.V(2).InfoS("Deletion for appliedWork is in progress", "appliedWork", work.Name)
509+
return ctrl.Result{Requeue: true}, nil
510+
}
511+
512+
// removeWorkFinalizer removes the finalizer from the work and updates it in the hub.
513+
func (r *Reconciler) removeWorkFinalizer(ctx context.Context, work *fleetv1beta1.Work) (ctrl.Result, error) {
514+
controllerutil.RemoveFinalizer(work, fleetv1beta1.WorkFinalizer)
511515
if err := r.hubClient.Update(ctx, work, &client.UpdateOptions{}); err != nil {
512516
klog.ErrorS(err, "Failed to remove the finalizer from the work", "work", klog.KObj(work))
513517
return ctrl.Result{}, controller.NewAPIServerError(false, err)
@@ -557,6 +561,10 @@ func (r *Reconciler) ensureAppliedWork(ctx context.Context, work *fleetv1beta1.W
557561
return nil, controller.NewAPIServerError(false, err)
558562
}
559563
}
564+
if err := r.spokeClient.Get(ctx, types.NamespacedName{Name: workRef.Name}, appliedWork); err != nil {
565+
klog.ErrorS(err, "Failed to get the appliedWork", "appliedWork", appliedWork.Name)
566+
return nil, controller.NewAPIServerError(false, err)
567+
}
560568
klog.InfoS("Recreated the appliedWork resource", "appliedWork", workRef.Name)
561569
return appliedWork, nil
562570
}

pkg/controllers/workapplier/controller_integration_test.go

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ func prepareAppliedWorkOwnerRef(workName string) *metav1.OwnerReference {
178178
Kind: "AppliedWork",
179179
Name: appliedWork.Name,
180180
UID: appliedWork.GetUID(),
181-
BlockOwnerDeletion: ptr.To(false),
181+
BlockOwnerDeletion: ptr.To(true),
182182
}
183183
}
184184

@@ -451,6 +451,10 @@ func cleanupWorkObject(workName string) {
451451
workRemovedActual := func() error {
452452
work := &fleetv1beta1.Work{}
453453
if err := hubClient.Get(ctx, client.ObjectKey{Name: workName, Namespace: memberReservedNSName}, work); !errors.IsNotFound(err) {
454+
if controllerutil.ContainsFinalizer(work, fleetv1beta1.WorkFinalizer) {
455+
controllerutil.RemoveFinalizer(work, fleetv1beta1.WorkFinalizer)
456+
Expect(hubClient.Update(ctx, work)).To(Succeed(), "Failed to remove the finalizer from the Work object")
457+
}
454458
return fmt.Errorf("work object still exists or an unexpected error occurred: %w", err)
455459
}
456460
return nil
@@ -459,6 +463,15 @@ func cleanupWorkObject(workName string) {
459463
}
460464

461465
func appliedWorkRemovedActual(workName string) func() error {
466+
// Retrieve the AppliedWork object.
467+
currentAppliedWork := &fleetv1beta1.AppliedWork{}
468+
if err := memberClient.Get(ctx, client.ObjectKey{Name: workName}, currentAppliedWork); err == nil {
469+
if controllerutil.ContainsFinalizer(currentAppliedWork, metav1.FinalizerDeleteDependents) {
470+
controllerutil.RemoveFinalizer(currentAppliedWork, metav1.FinalizerDeleteDependents)
471+
Expect(memberClient.Update(ctx, currentAppliedWork)).To(Succeed(), "Failed to remove the finalizer from the AppliedWork object")
472+
}
473+
}
474+
462475
return func() error {
463476
// Retrieve the AppliedWork object.
464477
appliedWork := &fleetv1beta1.AppliedWork{}

test/e2e/placement_apply_strategy_test.go

Lines changed: 49 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
. "github.com/onsi/ginkgo/v2"
2424
. "github.com/onsi/gomega"
2525
corev1 "k8s.io/api/core/v1"
26+
"k8s.io/apimachinery/pkg/api/errors"
2627
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2728
"k8s.io/apimachinery/pkg/types"
2829
"k8s.io/apimachinery/pkg/util/intstr"
@@ -77,11 +78,21 @@ var _ = Describe("validating CRP when resources exists", Ordered, func() {
7778
})
7879

7980
AfterAll(func() {
81+
By("deleting created work resources on member cluster")
82+
cleanWorkResourcesOnCluster(allMemberClusters[0])
83+
8084
By(fmt.Sprintf("deleting placement %s", crpName))
8185
cleanupCRP(crpName)
8286

83-
By("deleting created work resources on member cluster")
84-
cleanWorkResourcesOnCluster(allMemberClusters[0])
87+
// Check if work is deleted. Needed to ensure that the Work resource is cleaned up before the next CRP is created.
88+
// This is because the Work resource is created with a finalizer that blocks deletion until the all applied work
89+
// and applied work itself is successfully deleted. If the Work resource is not deleted, it can cause resource overlap
90+
// and flakiness in subsequent tests.
91+
By("Check if work is deleted")
92+
Eventually(func() bool {
93+
work := &placementv1beta1.Work{}
94+
return errors.IsNotFound(hubClient.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("%s-work", crpName), Namespace: fmt.Sprintf("fleet-member-%s", allMemberClusterNames[0])}, work))
95+
}, 6*eventuallyDuration, eventuallyInterval).Should(BeTrue(), "Work resource should be deleted from hub")
8596
})
8697

8798
It("should update CRP status as expected", func() {
@@ -119,7 +130,7 @@ var _ = Describe("validating CRP when resources exists", Ordered, func() {
119130
It("namespace should be kept on member cluster", func() {
120131
Consistently(func() error {
121132
ns := &corev1.Namespace{}
122-
return allMemberClusters[0].KubeClient.Get(ctx, types.NamespacedName{Name: workNamespaceName}, ns)
133+
return hubClient.Get(ctx, types.NamespacedName{Name: workNamespaceName}, ns)
123134
}, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Namespace which is not owned by the CRP should not be deleted")
124135
})
125136
})
@@ -139,6 +150,16 @@ var _ = Describe("validating CRP when resources exists", Ordered, func() {
139150
})
140151

141152
AfterAll(func() {
153+
// Check if work is deleted. Needed to ensure that the Work resource is cleaned up before the next CRP is created.
154+
// This is because the Work resource is created with a finalizer that blocks deletion until the all applied work
155+
// and applied work itself is successfully deleted. If the Work resource is not deleted, it can cause resource overlap
156+
// and flakiness in subsequent tests.
157+
By("Check if work is deleted")
158+
Eventually(func() bool {
159+
work := &placementv1beta1.Work{}
160+
return errors.IsNotFound(hubClient.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("%s-work", crpName), Namespace: fmt.Sprintf("fleet-member-%s", allMemberClusterNames[0])}, work))
161+
}, 6*eventuallyDuration, eventuallyInterval).Should(BeTrue(), "Work resource should be deleted from hub")
162+
142163
By(fmt.Sprintf("deleting placement %s", crpName))
143164
cleanupCRP(crpName)
144165
})
@@ -194,6 +215,7 @@ var _ = Describe("validating CRP when resources exists", Ordered, func() {
194215
AfterAll(func() {
195216
By(fmt.Sprintf("deleting placement %s", crpName))
196217
cleanupCRP(crpName)
218+
197219
})
198220

199221
It("should update CRP status as expected", func() {
@@ -250,11 +272,21 @@ var _ = Describe("validating CRP when resources exists", Ordered, func() {
250272
})
251273

252274
AfterAll(func() {
275+
By("deleting created work resources on member cluster")
276+
cleanWorkResourcesOnCluster(allMemberClusters[0])
277+
253278
By(fmt.Sprintf("deleting placement %s", crpName))
254279
cleanupCRP(crpName)
255280

256-
By("deleting created work resources on member cluster")
257-
cleanWorkResourcesOnCluster(allMemberClusters[0])
281+
// Check if work is deleted. Needed to ensure that the Work resource is cleaned up before the next CRP is created.
282+
// This is because the Work resource is created with a finalizer that blocks deletion until the all applied work
283+
// and applied work itself is successfully deleted. If the Work resource is not deleted, it can cause resource overlap
284+
// and flakiness in subsequent tests.
285+
By("Check if work is deleted")
286+
Eventually(func() bool {
287+
work := &placementv1beta1.Work{}
288+
return errors.IsNotFound(hubClient.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("%s-work", crpName), Namespace: fmt.Sprintf("fleet-member-%s", allMemberClusterNames[0])}, work))
289+
}, 6*eventuallyDuration, eventuallyInterval).Should(BeTrue(), "Work resource should be deleted from hub")
258290
})
259291

260292
It("should update CRP status as expected", func() {
@@ -385,11 +417,21 @@ var _ = Describe("validating CRP when resources exists", Ordered, func() {
385417
})
386418

387419
AfterAll(func() {
420+
By("deleting created work resources on member cluster")
421+
cleanWorkResourcesOnCluster(allMemberClusters[0])
422+
388423
By(fmt.Sprintf("deleting placement %s", crpName))
389424
cleanupCRP(crpName)
390425

391-
By("deleting created work resources on member cluster")
392-
cleanWorkResourcesOnCluster(allMemberClusters[0])
426+
// Check if work is deleted. Needed to ensure that the Work resource is cleaned up before the next CRP is created.
427+
// This is because the Work resource is created with a finalizer that blocks deletion until the all applied work
428+
// and applied work itself is successfully deleted. If the Work resource is not deleted, it can cause resource overlap
429+
// and flakiness in subsequent tests.
430+
By("Check if work is deleted")
431+
Eventually(func() bool {
432+
work := &placementv1beta1.Work{}
433+
return errors.IsNotFound(hubClient.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("%s-work", crpName), Namespace: fmt.Sprintf("fleet-member-%s", allMemberClusterNames[0])}, work))
434+
}, 7*eventuallyDuration, eventuallyInterval).Should(BeTrue(), "Work resource should be deleted from hub")
393435
})
394436

395437
It("should update CRP status as expected", func() {

test/e2e/placement_pickfixed_test.go

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,11 @@ import (
2121

2222
. "github.com/onsi/ginkgo/v2"
2323
. "github.com/onsi/gomega"
24+
corev1 "k8s.io/api/core/v1"
2425
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2526
"k8s.io/apimachinery/pkg/types"
2627
"k8s.io/utils/ptr"
28+
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
2729

2830
placementv1beta1 "github.com/kubefleet-dev/kubefleet/apis/placement/v1beta1"
2931
"github.com/kubefleet-dev/kubefleet/test/e2e/framework"
@@ -190,4 +192,105 @@ var _ = Describe("placing resources using a CRP of PickFixed placement type", fu
190192
ensureCRPAndRelatedResourcesDeleted(crpName, []*framework.Cluster{})
191193
})
192194
})
195+
196+
Context("switch to another cluster to simulate stuck deleting works", Ordered, func() {
197+
crpName := fmt.Sprintf(crpNameTemplate, GinkgoParallelProcess())
198+
workNamespaceName := fmt.Sprintf(workNamespaceNameTemplate, GinkgoParallelProcess())
199+
appConfigMapName := fmt.Sprintf(appConfigMapNameTemplate, GinkgoParallelProcess())
200+
var currentConfigMap corev1.ConfigMap
201+
202+
BeforeAll(func() {
203+
// Create the resources.
204+
createWorkResources()
205+
206+
// Create the CRP.
207+
crp := &placementv1beta1.ClusterResourcePlacement{
208+
ObjectMeta: metav1.ObjectMeta{
209+
Name: crpName,
210+
// Add a custom finalizer; this would allow us to better observe
211+
// the behavior of the controllers.
212+
Finalizers: []string{customDeletionBlockerFinalizer},
213+
},
214+
Spec: placementv1beta1.ClusterResourcePlacementSpec{
215+
ResourceSelectors: workResourceSelector(),
216+
Strategy: placementv1beta1.RolloutStrategy{
217+
Type: placementv1beta1.RollingUpdateRolloutStrategyType,
218+
RollingUpdate: &placementv1beta1.RollingUpdateConfig{
219+
UnavailablePeriodSeconds: ptr.To(2),
220+
},
221+
},
222+
Policy: &placementv1beta1.PlacementPolicy{
223+
PlacementType: placementv1beta1.PickFixedPlacementType,
224+
ClusterNames: []string{
225+
memberCluster1EastProdName,
226+
},
227+
},
228+
},
229+
}
230+
Expect(hubClient.Create(ctx, crp)).To(Succeed(), "Failed to create CRP")
231+
})
232+
233+
It("should update CRP status as expected", func() {
234+
crpStatusUpdatedActual := crpStatusUpdatedActual(workResourceIdentifiers(), []string{memberCluster1EastProdName}, nil, "0")
235+
Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected")
236+
})
237+
238+
It("should place resources on specified clusters", func() {
239+
resourcePlacedActual := workNamespaceAndConfigMapPlacedOnClusterActual(memberCluster1EastProd)
240+
Eventually(resourcePlacedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to place resources on specified clusters")
241+
})
242+
243+
It("should add finalizer to work resources on the specified clusters", func() {
244+
Eventually(func() error {
245+
if err := memberCluster1EastProd.KubeClient.Get(ctx, types.NamespacedName{Namespace: workNamespaceName, Name: appConfigMapName}, &currentConfigMap); err != nil {
246+
return err
247+
}
248+
return nil
249+
}, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to get configmap")
250+
// Add finalizer to block deletion to simulate work stuck
251+
controllerutil.AddFinalizer(&currentConfigMap, "example.com/finalizer")
252+
Expect(memberCluster1EastProd.KubeClient.Update(ctx, &currentConfigMap)).To(Succeed(), "Failed to update configmap with finalizer")
253+
})
254+
255+
It("update crp to pick another cluster", func() {
256+
Eventually(func() error {
257+
crp := &placementv1beta1.ClusterResourcePlacement{}
258+
if err := hubClient.Get(ctx, types.NamespacedName{Name: crpName}, crp); err != nil {
259+
return err
260+
}
261+
crp.Spec.Policy.ClusterNames = []string{memberCluster2EastCanaryName}
262+
return hubClient.Update(ctx, crp)
263+
}, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP")
264+
})
265+
266+
It("should update CRP status as expected", func() {
267+
// should successfully apply to the new cluster
268+
crpStatusUpdatedActual := crpStatusUpdatedActual(workResourceIdentifiers(), []string{memberCluster2EastCanaryName}, nil, "0")
269+
Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected")
270+
})
271+
272+
It("configmap should still exists on previously specified cluster", func() {
273+
Expect(memberCluster1EastProd.KubeClient.Get(ctx, types.NamespacedName{Namespace: workNamespaceName, Name: appConfigMapName}, &currentConfigMap)).Should(BeNil(), "ConfigMap should still exists on previously specified cluster")
274+
})
275+
276+
It("should remove finalizer from work resources on the specified clusters", func() {
277+
configMap := &corev1.ConfigMap{}
278+
Expect(memberCluster1EastProd.KubeClient.Get(ctx, types.NamespacedName{Namespace: workNamespaceName, Name: appConfigMapName}, configMap)).Should(Succeed(), "Failed to get configmap")
279+
controllerutil.RemoveFinalizer(configMap, "example.com/finalizer")
280+
Expect(memberCluster1EastProd.KubeClient.Update(ctx, configMap)).To(Succeed(), "Failed to update configmap with finalizer")
281+
})
282+
283+
It("should remove resources from previously specified clusters", func() {
284+
checkIfRemovedWorkResourcesFromMemberClusters([]*framework.Cluster{memberCluster1EastProd})
285+
})
286+
287+
It("should update CRP status as expected", func() {
288+
crpStatusUpdatedActual := crpStatusUpdatedActual(workResourceIdentifiers(), []string{memberCluster2EastCanaryName}, nil, "0")
289+
Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected")
290+
})
291+
292+
AfterAll(func() {
293+
ensureCRPAndRelatedResourcesDeleted(crpName, []*framework.Cluster{memberCluster1EastProd, memberCluster2EastCanary})
294+
})
295+
})
193296
})

test/e2e/resources/test-job.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@ apiVersion: batch/v1
22
kind: Job
33
metadata:
44
name: pi
5+
namespace: test-ns
56
spec:
7+
suspend: true
68
template:
79
spec:
810
containers:

test/e2e/rollout_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -718,8 +718,8 @@ var _ = Describe("placing wrapped resources using a CRP", Ordered, func() {
718718
})
719719
// job is not trackable, so we need to wait for a bit longer for each roll out
720720
It("should update CRP status as expected", func() {
721-
crpStatusUpdatedActual := customizedCRPStatusUpdatedActual(crpName, wantSelectedResources, allMemberClusterNames, nil, "1", false)
722-
Eventually(crpStatusUpdatedActual, 5*time.Duration(unAvailablePeriodSeconds)*time.Second, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected")
721+
crpStatusUpdatedActual := customizedCRPStatusUpdatedActual(crpName, wantSelectedResources, allMemberClusterNames, nil, "0", false)
722+
Eventually(crpStatusUpdatedActual, 6*time.Duration(unAvailablePeriodSeconds)*time.Second, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected")
723723
})
724724

725725
AfterAll(func() {

test/e2e/utils_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1265,7 +1265,7 @@ func buildOwnerReference(cluster *framework.Cluster, crpName string) *metav1.Own
12651265
Kind: "AppliedWork",
12661266
Name: workName,
12671267
UID: appliedWork.UID,
1268-
BlockOwnerDeletion: ptr.To(false),
1268+
BlockOwnerDeletion: ptr.To(true),
12691269
}
12701270
}
12711271

0 commit comments

Comments
 (0)