@@ -11,11 +11,13 @@ import (
1111 "errors"
1212 "fmt"
1313 "math"
14+ "strconv"
1415 "strings"
1516 "time"
1617
1718 "github.com/Azure/azure-sdk-for-go/sdk/azcore"
1819 "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/trafficmanager/armtrafficmanager"
20+ "github.com/prometheus/client_golang/prometheus"
1921 "golang.org/x/sync/errgroup"
2022 corev1 "k8s.io/api/core/v1"
2123 apierrors "k8s.io/apimachinery/pkg/api/errors"
@@ -37,10 +39,16 @@ import (
3739 fleetnetv1beta1 "go.goms.io/fleet-networking/api/v1beta1"
3840 "go.goms.io/fleet-networking/pkg/common/azureerrors"
3941 "go.goms.io/fleet-networking/pkg/common/defaulter"
42+ "go.goms.io/fleet-networking/pkg/common/metrics"
4043 "go.goms.io/fleet-networking/pkg/common/objectmeta"
4144 "go.goms.io/fleet-networking/pkg/controllers/hub/trafficmanagerprofile"
4245)
4346
47+ func init () {
48+ // Register the custom metrics
49+ prometheus .MustRegister (trafficManagerBackendStatusLastTimestampSeconds )
50+ }
51+
4452const (
4553 trafficManagerBackendProfileFieldKey = ".spec.profile.name"
4654 trafficManagerBackendBackendFieldKey = ".spec.backend.name"
7078 generateAzureTrafficManagerEndpointNamePrefixFunc = func (backend * fleetnetv1beta1.TrafficManagerBackend ) string {
7179 return fmt .Sprintf (AzureResourceEndpointNamePrefix , backend .UID )
7280 }
81+
82+ // trafficManagerBackendStatusLastTimestampSeconds is a prometheus metric that holds the last update timestamp of
83+ // traffic manager backend status in seconds.
84+ trafficManagerBackendStatusLastTimestampSeconds = prometheus .NewGaugeVec (prometheus.GaugeOpts {
85+ Namespace : metrics .MetricsNamespace ,
86+ Subsystem : metrics .MetricsSubsystem ,
87+ Name : "traffic_manager_backend_status_last_timestamp_seconds" ,
88+ Help : "Last update timestamp of traffic manager backend status in seconds" ,
89+ }, []string {"namespace" , "name" , "generation" , "condition" , "status" , "reason" })
7390)
7491
7592// Reconciler reconciles a trafficManagerBackend object.
@@ -113,30 +130,54 @@ func (r *Reconciler) Reconcile(ctx context.Context, req reconcile.Request) (reco
113130 return r .handleDelete (ctx , backend )
114131 }
115132
133+ // register metrics finalizer
134+ if ! controllerutil .ContainsFinalizer (backend , objectmeta .MetricsFinalizer ) {
135+ controllerutil .AddFinalizer (backend , objectmeta .MetricsFinalizer )
136+ if err := r .Update (ctx , backend ); err != nil {
137+ klog .ErrorS (err , "Failed to add trafficManagerBackend metrics finalizer" , "trafficManagerBackend" , backendKRef )
138+ return ctrl.Result {}, err
139+ }
140+ }
141+
142+ defer emitTrafficManagerBackendStatusMetric (backend )
143+
116144 // TODO: replace the following with defaulter webhook
117145 defaulter .SetDefaultsTrafficManagerBackend (backend )
118146 return r .handleUpdate (ctx , backend )
119147}
120148
121149func (r * Reconciler ) handleDelete (ctx context.Context , backend * fleetnetv1beta1.TrafficManagerBackend ) (ctrl.Result , error ) {
122150 backendKObj := klog .KObj (backend )
151+ needUpdate := false
123152 // The backend is being deleted
124- if ! controllerutil .ContainsFinalizer (backend , objectmeta .TrafficManagerBackendFinalizer ) {
125- klog .V (2 ).InfoS ("TrafficManagerBackend is being deleted" , "trafficManagerBackend" , backendKObj )
126- return ctrl.Result {}, nil
153+ if controllerutil .ContainsFinalizer (backend , objectmeta .MetricsFinalizer ) {
154+ klog .V (2 ).InfoS ("TrafficManagerBackend is being deleted and cleaning up its metrics" , "trafficManagerBackend" , backendKObj )
155+ // The controller registers backend finalizer only before creating atm backend to avoid the deletion stuck for the 403 error.
156+ // We use a separate finalizer to clean up the metrics for the backend.
157+ trafficManagerBackendStatusLastTimestampSeconds .DeletePartialMatch (prometheus.Labels {"namespace" : backend .GetNamespace (), "name" : backend .GetName ()})
158+ controllerutil .RemoveFinalizer (backend , objectmeta .MetricsFinalizer )
159+ needUpdate = true
160+ }
161+
162+ if controllerutil .ContainsFinalizer (backend , objectmeta .TrafficManagerBackendFinalizer ) {
163+ if err := r .deleteAzureTrafficManagerEndpoints (ctx , backend ); err != nil {
164+ klog .ErrorS (err , "Failed to delete Azure Traffic Manager endpoints" , "trafficManagerBackend" , backendKObj )
165+ return ctrl.Result {}, err
166+ }
167+ controllerutil .RemoveFinalizer (backend , objectmeta .TrafficManagerBackendFinalizer )
168+ needUpdate = true
127169 }
128170
129- if err := r . deleteAzureTrafficManagerEndpoints ( ctx , backend ); err != nil {
130- klog .ErrorS ( err , "Failed to delete Azure Traffic Manager endpoints " , "trafficManagerBackend" , backendKObj )
131- return ctrl.Result {}, err
171+ if ! needUpdate {
172+ klog .V ( 2 ). InfoS ( "No need to remove finalizer " , "trafficManagerBackend" , backendKObj )
173+ return ctrl.Result {}, nil
132174 }
133175
134- controllerutil .RemoveFinalizer (backend , objectmeta .TrafficManagerBackendFinalizer )
135176 if err := r .Client .Update (ctx , backend ); err != nil {
136- klog .ErrorS (err , "Failed to remove trafficManagerBackend finalizer " , "trafficManagerBackend" , backendKObj )
177+ klog .ErrorS (err , "Failed to remove trafficManagerBackend finalizers " , "trafficManagerBackend" , backendKObj )
137178 return ctrl.Result {}, controller .NewUpdateIgnoreConflictError (err )
138179 }
139- klog .V (2 ).InfoS ("Removed trafficManagerBackend finalizer " , "trafficManagerBackend" , backendKObj )
180+ klog .V (2 ).InfoS ("Removed trafficManagerBackend finalizers " , "trafficManagerBackend" , backendKObj )
140181 return ctrl.Result {}, nil
141182}
142183
@@ -811,3 +852,18 @@ func (r *Reconciler) internalServiceExportEventHandler() handler.MapFunc {
811852 return []reconcile.Request {}
812853 }
813854}
855+
856+ // emitTrafficManagerBackendStatusMetric emits the traffic manager backend status metric based on status conditions.
857+ func emitTrafficManagerBackendStatusMetric (backend * fleetnetv1beta1.TrafficManagerBackend ) {
858+ generation := backend .Generation
859+ genStr := strconv .FormatInt (generation , 10 )
860+
861+ cond := meta .FindStatusCondition (backend .Status .Conditions , string (fleetnetv1beta1 .TrafficManagerBackendConditionAccepted ))
862+ if cond != nil && cond .ObservedGeneration == generation {
863+ trafficManagerBackendStatusLastTimestampSeconds .WithLabelValues (backend .GetNamespace (), backend .GetName (), genStr ,
864+ string (fleetnetv1beta1 .TrafficManagerBackendConditionAccepted ), string (cond .Status ), cond .Reason ).SetToCurrentTime ()
865+ return
866+ }
867+ // We should rarely reach here, it can only happen when updating status fails.
868+ klog .V (2 ).InfoS ("There's no accepted status condition on trafficManagerBackend, status updating failed possibly" , "trafficManagerBackend" , klog .KObj (backend ))
869+ }
0 commit comments