@@ -15,15 +15,14 @@ limitations under the License.
1515*/
1616
1717// Package controller features a controller to reconcile ApprovalRequest objects
18- // and create MetricCollector resources on member clusters for approved stages .
18+ // and create MetricCollectorReport resources on the hub cluster for metric collection .
1919package controller
2020
2121import (
2222 "context"
2323 "fmt"
2424 "time"
2525
26- apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
2726 "k8s.io/apimachinery/pkg/api/errors"
2827 "k8s.io/apimachinery/pkg/api/meta"
2928 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -42,15 +41,15 @@ import (
4241)
4342
4443const (
45- // metricCollectorFinalizer is the finalizer added to ApprovalRequest objects
46- metricCollectorFinalizer = "kubernetes-fleet.io/metric-collector-cleanup"
44+ // metricCollectorFinalizer is the finalizer added to ApprovalRequest objects for cleanup
45+ metricCollectorFinalizer = "kubernetes-fleet.io/metric-collector-report- cleanup"
4746
48- // prometheusURL is the default Prometheus URL to use
47+ // prometheusURL is the default Prometheus URL to use for all clusters
4948 prometheusURL = "http://prometheus.prometheus.svc.cluster.local:9090"
5049)
5150
52- // Reconciler reconciles an ApprovalRequest object and creates MetricCollector resources
53- // on member clusters when the approval is granted .
51+ // Reconciler reconciles an ApprovalRequest object and creates MetricCollectorReport resources
52+ // on the hub cluster in fleet-member-{clusterName} namespaces .
5453type Reconciler struct {
5554 client.Client
5655 recorder record.EventRecorder
@@ -182,13 +181,13 @@ func (r *Reconciler) reconcileApprovalRequestObj(ctx context.Context, approvalRe
182181
183182 klog .V (2 ).InfoS ("Found clusters in stage" , "approvalRequest" , approvalReqRef , "stage" , stageName , "clusters" , clusterNames )
184183
185- // Create or update the MetricCollector resource, CRP, and ResourceOverrides
186- if err := r .ensureMetricCollectorResources (ctx , obj , clusterNames , updateRunName , stageName ); err != nil {
187- klog .ErrorS (err , "Failed to ensure MetricCollector resources" , "approvalRequest" , approvalReqRef )
184+ // Create or update MetricCollectorReport resources in fleet-member namespaces
185+ if err := r .ensureMetricCollectorReports (ctx , obj , clusterNames , updateRunName , stageName ); err != nil {
186+ klog .ErrorS (err , "Failed to ensure MetricCollectorReport resources" , "approvalRequest" , approvalReqRef )
188187 return ctrl.Result {}, err
189188 }
190189
191- klog .V (2 ).InfoS ("Successfully ensured MetricCollector resources" , "approvalRequest" , approvalReqRef , "clusters" , clusterNames )
190+ klog .V (2 ).InfoS ("Successfully ensured MetricCollectorReport resources" , "approvalRequest" , approvalReqRef , "clusters" , clusterNames )
192191
193192 // Check workload health and approve if all workloads are healthy
194193 if err := r .checkWorkloadHealthAndApprove (ctx , approvalReqObj , clusterNames , updateRunName , stageName ); err != nil {
@@ -200,154 +199,67 @@ func (r *Reconciler) reconcileApprovalRequestObj(ctx context.Context, approvalRe
200199 return ctrl.Result {RequeueAfter : 15 * time .Second }, nil
201200}
202201
203- // ensureMetricCollectorResources creates the Namespace, MetricCollector, CRP, and ResourceOverrides
204- func (r * Reconciler ) ensureMetricCollectorResources (
202+ // ensureMetricCollectorReports creates MetricCollectorReport in each fleet-member-{clusterName} namespace
203+ func (r * Reconciler ) ensureMetricCollectorReports (
205204 ctx context.Context ,
206205 approvalReq client.Object ,
207206 clusterNames []string ,
208207 updateRunName , stageName string ,
209208) error {
210- // Generate names
211- metricCollectorName := fmt .Sprintf ("mc-%s-%s" , updateRunName , stageName )
212- crpName := fmt .Sprintf ("crp-mc-%s-%s" , updateRunName , stageName )
213- roName := fmt .Sprintf ("ro-mc-%s-%s" , updateRunName , stageName )
214-
215- // Create MetricCollector resource (cluster-scoped) on hub
216- metricCollector := & localv1alpha1.MetricCollector {
217- ObjectMeta : metav1.ObjectMeta {
218- Name : metricCollectorName ,
219- Labels : map [string ]string {
220- "app" : "metric-collector" ,
221- "approval-request" : approvalReq .GetName (),
222- "update-run" : updateRunName ,
223- "stage" : stageName ,
224- },
225- },
226- Spec : localv1alpha1.MetricCollectorSpec {
227- PrometheusURL : prometheusURL ,
228- // ReportNamespace will be overridden per cluster
229- ReportNamespace : "placeholder" ,
230- },
231- }
232-
233- // Create or update MetricCollector
234- existingMC := & localv1alpha1.MetricCollector {}
235- err := r .Client .Get (ctx , types.NamespacedName {Name : metricCollectorName }, existingMC )
236- if err != nil {
237- if errors .IsNotFound (err ) {
238- if err := r .Client .Create (ctx , metricCollector ); err != nil {
239- return fmt .Errorf ("failed to create MetricCollector: %w" , err )
240- }
241- klog .V (2 ).InfoS ("Created MetricCollector" , "metricCollector" , klog .KObj (metricCollector ))
242- } else {
243- return fmt .Errorf ("failed to get MetricCollector: %w" , err )
244- }
245- }
209+ // Generate report name (same for all clusters, different namespaces)
210+ reportName := fmt .Sprintf ("mc-%s-%s" , updateRunName , stageName )
246211
247- // Create ResourceOverride with rules for each cluster
248- overrideRules := make ([]placementv1beta1.OverrideRule , 0 , len (clusterNames ))
212+ // Create MetricCollectorReport in each fleet-member namespace
249213 for _ , clusterName := range clusterNames {
250214 reportNamespace := fmt .Sprintf (utils .NamespaceNameFormat , clusterName )
251215
252- overrideRules = append (overrideRules , placementv1beta1.OverrideRule {
253- ClusterSelector : & placementv1beta1.ClusterSelector {
254- ClusterSelectorTerms : []placementv1beta1.ClusterSelectorTerm {
255- {
256- LabelSelector : & metav1.LabelSelector {
257- MatchLabels : map [string ]string {
258- "kubernetes-fleet.io/cluster-name" : clusterName ,
259- },
260- },
261- },
262- },
263- },
264- JSONPatchOverrides : []placementv1beta1.JSONPatchOverride {
265- {
266- Operator : placementv1beta1 .JSONPatchOverrideOpReplace ,
267- Path : "/spec/reportNamespace" ,
268- Value : apiextensionsv1.JSON {Raw : []byte (fmt .Sprintf (`"%s"` , reportNamespace ))},
269- },
270- },
271- })
272- }
273-
274- // Create ClusterResourceOverride with rules for each cluster
275- clusterResourceOverride := & placementv1beta1.ClusterResourceOverride {
276- ObjectMeta : metav1.ObjectMeta {
277- Name : roName ,
278- Labels : map [string ]string {
279- "approval-request" : approvalReq .GetName (),
280- "update-run" : updateRunName ,
281- "stage" : stageName ,
282- },
283- },
284- Spec : placementv1beta1.ClusterResourceOverrideSpec {
285- ClusterResourceSelectors : []placementv1beta1.ResourceSelectorTerm {
286- {
287- Group : "metric.kubernetes-fleet.io" ,
288- Version : "v1alpha1" ,
289- Kind : "MetricCollector" ,
290- Name : metricCollectorName ,
216+ report := & localv1alpha1.MetricCollectorReport {
217+ ObjectMeta : metav1.ObjectMeta {
218+ Name : reportName ,
219+ Namespace : reportNamespace ,
220+ Labels : map [string ]string {
221+ "approval-request" : approvalReq .GetName (),
222+ "update-run" : updateRunName ,
223+ "stage" : stageName ,
224+ "cluster" : clusterName ,
291225 },
292226 },
293- Policy : & placementv1beta1. OverridePolicy {
294- OverrideRules : overrideRules ,
227+ Spec : localv1alpha1. MetricCollectorReportSpec {
228+ PrometheusURL : prometheusURL ,
295229 },
296- },
297- }
298-
299- // Create or update ClusterResourceOverride
300- existingCRO := & placementv1beta1.ClusterResourceOverride {}
301- err = r .Client .Get (ctx , types.NamespacedName {Name : roName }, existingCRO )
302- if err != nil {
303- if errors .IsNotFound (err ) {
304- if err := r .Client .Create (ctx , clusterResourceOverride ); err != nil {
305- return fmt .Errorf ("failed to create ClusterResourceOverride: %w" , err )
306- }
307- klog .V (2 ).InfoS ("Created ClusterResourceOverride" , "clusterResourceOverride" , roName )
308- } else {
309- return fmt .Errorf ("failed to get ClusterResourceOverride: %w" , err )
310230 }
311- }
312231
313- // Create ClusterResourcePlacement with PickFixed policy
314- // CRP resource selector selects the MetricCollector directly
315- crp := & placementv1beta1.ClusterResourcePlacement {
316- ObjectMeta : metav1.ObjectMeta {
317- Name : crpName ,
318- Labels : map [string ]string {
319- "approval-request" : approvalReq .GetName (),
320- "update-run" : updateRunName ,
321- "stage" : stageName ,
322- },
323- },
324- Spec : placementv1beta1.PlacementSpec {
325- ResourceSelectors : []placementv1beta1.ResourceSelectorTerm {
326- {
327- Group : "metric.kubernetes-fleet.io" ,
328- Version : "v1alpha1" ,
329- Kind : "MetricCollector" ,
330- Name : metricCollectorName ,
331- },
332- },
333- Policy : & placementv1beta1.PlacementPolicy {
334- PlacementType : placementv1beta1 .PickFixedPlacementType ,
335- ClusterNames : clusterNames ,
336- },
337- },
338- }
232+ // Create or update MetricCollectorReport
233+ existingReport := & localv1alpha1.MetricCollectorReport {}
234+ err := r .Client .Get (ctx , types.NamespacedName {
235+ Name : reportName ,
236+ Namespace : reportNamespace ,
237+ }, existingReport )
339238
340- // Create or update CRP
341- existingCRP := & placementv1beta1.ClusterResourcePlacement {}
342- err = r .Client .Get (ctx , types.NamespacedName {Name : crpName }, existingCRP )
343- if err != nil {
344- if errors .IsNotFound (err ) {
345- if err := r .Client .Create (ctx , crp ); err != nil {
346- return fmt .Errorf ("failed to create ClusterResourcePlacement: %w" , err )
239+ if err != nil {
240+ if errors .IsNotFound (err ) {
241+ if err := r .Client .Create (ctx , report ); err != nil {
242+ return fmt .Errorf ("failed to create MetricCollectorReport in %s: %w" , reportNamespace , err )
243+ }
244+ klog .V (2 ).InfoS ("Created MetricCollectorReport" ,
245+ "report" , reportName ,
246+ "namespace" , reportNamespace ,
247+ "cluster" , clusterName )
248+ } else {
249+ return fmt .Errorf ("failed to get MetricCollectorReport in %s: %w" , reportNamespace , err )
347250 }
348- klog .V (2 ).InfoS ("Created ClusterResourcePlacement" , "crp" , crpName )
349251 } else {
350- return fmt .Errorf ("failed to get ClusterResourcePlacement: %w" , err )
252+ // Update spec if needed
253+ if existingReport .Spec .PrometheusURL != prometheusURL {
254+ existingReport .Spec .PrometheusURL = prometheusURL
255+ if err := r .Client .Update (ctx , existingReport ); err != nil {
256+ return fmt .Errorf ("failed to update MetricCollectorReport in %s: %w" , reportNamespace , err )
257+ }
258+ klog .V (2 ).InfoS ("Updated MetricCollectorReport" ,
259+ "report" , reportName ,
260+ "namespace" , reportNamespace ,
261+ "cluster" , clusterName )
262+ }
351263 }
352264 }
353265
@@ -465,15 +377,15 @@ func (r *Reconciler) checkWorkloadHealthAndApprove(
465377 klog .V (2 ).InfoS ("Found MetricCollectorReport" ,
466378 "approvalRequest" , approvalReqRef ,
467379 "cluster" , clusterName ,
468- "collectedMetrics" , len (report .CollectedMetrics ),
469- "workloadsMonitored" , report .WorkloadsMonitored )
380+ "collectedMetrics" , len (report .Status . CollectedMetrics ),
381+ "workloadsMonitored" , report .Status . WorkloadsMonitored )
470382
471383 // Check if all workloads from WorkloadTracker are present and healthy
472384 for _ , trackedWorkload := range workloads {
473385 found := false
474386 healthy := false
475387
476- for _ , collectedMetric := range report .CollectedMetrics {
388+ for _ , collectedMetric := range report .Status . CollectedMetrics {
477389 if collectedMetric .Namespace == trackedWorkload .Namespace &&
478390 collectedMetric .WorkloadName == trackedWorkload .Name {
479391 found = true
@@ -562,40 +474,77 @@ func (r *Reconciler) handleDelete(ctx context.Context, approvalReqObj placementv
562474 }
563475
564476 approvalReqRef := klog .KObj (obj )
565- klog .V (2 ).InfoS ("Cleaning up resources for ApprovalRequest" , "approvalRequest" , approvalReqRef )
477+ klog .V (2 ).InfoS ("Cleaning up MetricCollectorReports for ApprovalRequest" , "approvalRequest" , approvalReqRef )
566478
567- // Delete CRP (it will cascade delete the resources on member clusters)
479+ // Get cluster names from UpdateRun to know which reports to delete
568480 spec := approvalReqObj .GetApprovalRequestSpec ()
569481 updateRunName := spec .TargetUpdateRun
570482 stageName := spec .TargetStage
571- crpName := fmt .Sprintf ("crp-mc-%s-%s" , updateRunName , stageName )
572- metricCollectorName := fmt .Sprintf ("mc-%s-%s" , updateRunName , stageName )
573- croName := fmt .Sprintf ("ro-mc-%s-%s" , updateRunName , stageName )
483+ reportName := fmt .Sprintf ("mc-%s-%s" , updateRunName , stageName )
574484
575- crp := & placementv1beta1.ClusterResourcePlacement {}
576- if err := r .Client .Get (ctx , types.NamespacedName {Name : crpName }, crp ); err == nil {
577- if err := r .Client .Delete (ctx , crp ); err != nil && ! errors .IsNotFound (err ) {
578- return ctrl.Result {}, fmt .Errorf ("failed to delete CRP: %w" , err )
485+ // Fetch UpdateRun to get cluster names
486+ var clusterNames []string
487+ if obj .GetNamespace () == "" {
488+ // Cluster-scoped: Get ClusterStagedUpdateRun
489+ updateRun := & placementv1beta1.ClusterStagedUpdateRun {}
490+ if err := r .Client .Get (ctx , types.NamespacedName {Name : updateRunName }, updateRun ); err != nil {
491+ if ! errors .IsNotFound (err ) {
492+ klog .ErrorS (err , "Failed to get ClusterStagedUpdateRun for cleanup" , "approvalRequest" , approvalReqRef )
493+ }
494+ // Continue with finalizer removal even if UpdateRun not found
495+ } else {
496+ // Find the stage
497+ for i := range updateRun .Status .StagesStatus {
498+ if updateRun .Status .StagesStatus [i ].StageName == stageName {
499+ for _ , cluster := range updateRun .Status .StagesStatus [i ].Clusters {
500+ clusterNames = append (clusterNames , cluster .ClusterName )
501+ }
502+ break
503+ }
504+ }
579505 }
580- klog .V (2 ).InfoS ("Deleted ClusterResourcePlacement" , "crp" , crpName )
581- }
582-
583- // Delete ClusterResourceOverride
584- cro := & placementv1beta1.ClusterResourceOverride {}
585- if err := r .Client .Get (ctx , types.NamespacedName {Name : croName }, cro ); err == nil {
586- if err := r .Client .Delete (ctx , cro ); err != nil && ! errors .IsNotFound (err ) {
587- return ctrl.Result {}, fmt .Errorf ("failed to delete ClusterResourceOverride: %w" , err )
506+ } else {
507+ // Namespace-scoped: Get StagedUpdateRun
508+ updateRun := & placementv1beta1.StagedUpdateRun {}
509+ if err := r .Client .Get (ctx , types.NamespacedName {Name : updateRunName , Namespace : obj .GetNamespace ()}, updateRun ); err != nil {
510+ if ! errors .IsNotFound (err ) {
511+ klog .ErrorS (err , "Failed to get StagedUpdateRun for cleanup" , "approvalRequest" , approvalReqRef )
512+ }
513+ // Continue with finalizer removal even if UpdateRun not found
514+ } else {
515+ // Find the stage
516+ for i := range updateRun .Status .StagesStatus {
517+ if updateRun .Status .StagesStatus [i ].StageName == stageName {
518+ for _ , cluster := range updateRun .Status .StagesStatus [i ].Clusters {
519+ clusterNames = append (clusterNames , cluster .ClusterName )
520+ }
521+ break
522+ }
523+ }
588524 }
589- klog .V (2 ).InfoS ("Deleted ClusterResourceOverride" , "clusterResourceOverride" , croName )
590525 }
591526
592- // Delete MetricCollector
593- metricCollector := & localv1alpha1.MetricCollector {}
594- if err := r .Client .Get (ctx , types.NamespacedName {Name : metricCollectorName }, metricCollector ); err == nil {
595- if err := r .Client .Delete (ctx , metricCollector ); err != nil && ! errors .IsNotFound (err ) {
596- return ctrl.Result {}, fmt .Errorf ("failed to delete MetricCollector: %w" , err )
527+ // Delete MetricCollectorReport from each fleet-member namespace
528+ for _ , clusterName := range clusterNames {
529+ reportNamespace := fmt .Sprintf (utils .NamespaceNameFormat , clusterName )
530+ report := & localv1alpha1.MetricCollectorReport {}
531+
532+ if err := r .Client .Get (ctx , types.NamespacedName {
533+ Name : reportName ,
534+ Namespace : reportNamespace ,
535+ }, report ); err == nil {
536+ if err := r .Client .Delete (ctx , report ); err != nil && ! errors .IsNotFound (err ) {
537+ klog .ErrorS (err , "Failed to delete MetricCollectorReport" ,
538+ "report" , reportName ,
539+ "namespace" , reportNamespace ,
540+ "cluster" , clusterName )
541+ return ctrl.Result {}, fmt .Errorf ("failed to delete MetricCollectorReport in %s: %w" , reportNamespace , err )
542+ }
543+ klog .V (2 ).InfoS ("Deleted MetricCollectorReport" ,
544+ "report" , reportName ,
545+ "namespace" , reportNamespace ,
546+ "cluster" , clusterName )
597547 }
598- klog .V (2 ).InfoS ("Deleted MetricCollector" , "metricCollector" , metricCollectorName )
599548 }
600549
601550 // Remove finalizer
@@ -605,7 +554,7 @@ func (r *Reconciler) handleDelete(ctx context.Context, approvalReqObj placementv
605554 return ctrl.Result {}, err
606555 }
607556
608- klog .V (2 ).InfoS ("Successfully cleaned up resources " , "approvalRequest" , approvalReqRef )
557+ klog .V (2 ).InfoS ("Successfully cleaned up MetricCollectorReports " , "approvalRequest" , approvalReqRef , "clusters" , clusterNames )
609558 return ctrl.Result {}, nil
610559}
611560
0 commit comments