Skip to content

Commit 0650598

Browse files
authored
feat: add status metrics for trafficManagerBackend (#291)
1 parent 1960b9d commit 0650598

File tree

5 files changed

+508
-75
lines changed

5 files changed

+508
-75
lines changed

go.mod

Lines changed: 19 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2,40 +2,23 @@ module go.goms.io/fleet-networking
22

33
go 1.23.6
44

5-
require (
6-
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.16.0
7-
github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.8.0
8-
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v4 v4.3.0
9-
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/trafficmanager/armtrafficmanager v1.3.0
10-
github.com/google/go-cmp v0.6.0
11-
github.com/onsi/ginkgo/v2 v2.21.0
12-
github.com/onsi/gomega v1.35.1
13-
github.com/prometheus/client_golang v1.19.1
14-
github.com/prometheus/common v0.55.0
15-
github.com/stretchr/testify v1.10.0
16-
golang.org/x/sync v0.12.0
17-
k8s.io/api v0.31.1
18-
k8s.io/apimachinery v0.31.1
19-
k8s.io/client-go v0.31.1
20-
k8s.io/klog/v2 v2.130.1
21-
k8s.io/utils v0.0.0-20240711033017-18e509b52bc8
22-
sigs.k8s.io/cloud-provider-azure/pkg/azclient v0.0.50
23-
sigs.k8s.io/controller-runtime v0.19.0
24-
)
25-
265
require go.goms.io/fleet v0.14.0
276

287
require (
298
github.com/Azure/azure-sdk-for-go v68.0.0+incompatible // indirect
9+
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.16.0
10+
github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.8.0
3011
github.com/Azure/azure-sdk-for-go/sdk/internal v1.10.0 // indirect
3112
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/authorization/armauthorization/v2 v2.2.0 // indirect
3213
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5 v5.7.0 // indirect
3314
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerregistry/armcontainerregistry v1.2.0 // indirect
3415
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v4 v4.8.0 // indirect
3516
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/keyvault/armkeyvault v1.4.0 // indirect
17+
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v4 v4.3.0
3618
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/privatedns/armprivatedns v1.3.0 // indirect
3719
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources v1.2.0 // indirect
3820
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/storage/armstorage v1.6.0 // indirect
21+
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/trafficmanager/armtrafficmanager v1.3.0
3922
github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/azsecrets v1.3.0 // indirect
4023
github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/internal v1.1.0 // indirect
4124
github.com/Azure/go-autorest v14.2.0+incompatible // indirect
@@ -66,6 +49,7 @@ require (
6649
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
6750
github.com/golang/protobuf v1.5.4 // indirect
6851
github.com/google/gnostic-models v0.6.8 // indirect
52+
github.com/google/go-cmp v0.6.0
6953
github.com/google/gofuzz v1.2.0 // indirect
7054
github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db // indirect
7155
github.com/google/uuid v1.6.0 // indirect
@@ -77,13 +61,18 @@ require (
7761
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
7862
github.com/modern-go/reflect2 v1.0.2 // indirect
7963
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
64+
github.com/onsi/ginkgo/v2 v2.21.0
65+
github.com/onsi/gomega v1.35.1
8066
github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c // indirect
8167
github.com/pkg/errors v0.9.1 // indirect
8268
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
83-
github.com/prometheus/client_model v0.6.1 // indirect
69+
github.com/prometheus/client_golang v1.19.1
70+
github.com/prometheus/client_model v0.6.1
71+
github.com/prometheus/common v0.55.0
8472
github.com/prometheus/procfs v0.15.1 // indirect
8573
github.com/rogpeppe/go-internal v1.13.1 // indirect
8674
github.com/spf13/pflag v1.0.5 // indirect
75+
github.com/stretchr/testify v1.10.0
8776
github.com/x448/float16 v0.8.4 // indirect
8877
go.opentelemetry.io/otel v1.31.0 // indirect
8978
go.opentelemetry.io/otel/metric v1.31.0 // indirect
@@ -93,6 +82,7 @@ require (
9382
golang.org/x/exp v0.0.0-20241004190924-225e2abe05e6 // indirect
9483
golang.org/x/net v0.38.0 // indirect
9584
golang.org/x/oauth2 v0.23.0 // indirect
85+
golang.org/x/sync v0.12.0
9686
golang.org/x/sys v0.31.0 // indirect
9787
golang.org/x/term v0.30.0 // indirect
9888
golang.org/x/text v0.23.0 // indirect
@@ -104,10 +94,17 @@ require (
10494
gopkg.in/inf.v0 v0.9.1 // indirect
10595
gopkg.in/yaml.v2 v2.4.0 // indirect
10696
gopkg.in/yaml.v3 v3.0.1 // indirect
97+
k8s.io/api v0.31.1
10798
k8s.io/apiextensions-apiserver v0.31.1 // indirect
99+
k8s.io/apimachinery v0.31.1
100+
k8s.io/client-go v0.31.1
101+
k8s.io/klog/v2 v2.130.1
108102
k8s.io/kube-openapi v0.0.0-20240903163716-9e1beecbcb38 // indirect
109103
k8s.io/metrics v0.25.2 // indirect
104+
k8s.io/utils v0.0.0-20240711033017-18e509b52bc8
110105
sigs.k8s.io/cloud-provider-azure v1.28.2 // indirect
106+
sigs.k8s.io/cloud-provider-azure/pkg/azclient v0.0.50
107+
sigs.k8s.io/controller-runtime v0.19.0
111108
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
112109
sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect
113110
sigs.k8s.io/work-api v0.0.0-20220407021756-586d707fdb2c // indirect

pkg/controllers/hub/trafficmanagerbackend/controller.go

Lines changed: 65 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,13 @@ import (
1111
"errors"
1212
"fmt"
1313
"math"
14+
"strconv"
1415
"strings"
1516
"time"
1617

1718
"github.com/Azure/azure-sdk-for-go/sdk/azcore"
1819
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/trafficmanager/armtrafficmanager"
20+
"github.com/prometheus/client_golang/prometheus"
1921
"golang.org/x/sync/errgroup"
2022
corev1 "k8s.io/api/core/v1"
2123
apierrors "k8s.io/apimachinery/pkg/api/errors"
@@ -37,10 +39,16 @@ import (
3739
fleetnetv1beta1 "go.goms.io/fleet-networking/api/v1beta1"
3840
"go.goms.io/fleet-networking/pkg/common/azureerrors"
3941
"go.goms.io/fleet-networking/pkg/common/defaulter"
42+
"go.goms.io/fleet-networking/pkg/common/metrics"
4043
"go.goms.io/fleet-networking/pkg/common/objectmeta"
4144
"go.goms.io/fleet-networking/pkg/controllers/hub/trafficmanagerprofile"
4245
)
4346

47+
func init() {
48+
// Register the custom metrics
49+
prometheus.MustRegister(trafficManagerBackendStatusLastTimestampSeconds)
50+
}
51+
4452
const (
4553
trafficManagerBackendProfileFieldKey = ".spec.profile.name"
4654
trafficManagerBackendBackendFieldKey = ".spec.backend.name"
@@ -70,6 +78,15 @@ var (
7078
generateAzureTrafficManagerEndpointNamePrefixFunc = func(backend *fleetnetv1beta1.TrafficManagerBackend) string {
7179
return fmt.Sprintf(AzureResourceEndpointNamePrefix, backend.UID)
7280
}
81+
82+
// trafficManagerBackendStatusLastTimestampSeconds is a prometheus metric that holds the last update timestamp of
83+
// traffic manager backend status in seconds.
84+
trafficManagerBackendStatusLastTimestampSeconds = prometheus.NewGaugeVec(prometheus.GaugeOpts{
85+
Namespace: metrics.MetricsNamespace,
86+
Subsystem: metrics.MetricsSubsystem,
87+
Name: "traffic_manager_backend_status_last_timestamp_seconds",
88+
Help: "Last update timestamp of traffic manager backend status in seconds",
89+
}, []string{"namespace", "name", "generation", "condition", "status", "reason"})
7390
)
7491

7592
// Reconciler reconciles a trafficManagerBackend object.
@@ -113,30 +130,54 @@ func (r *Reconciler) Reconcile(ctx context.Context, req reconcile.Request) (reco
113130
return r.handleDelete(ctx, backend)
114131
}
115132

133+
// register metrics finalizer
134+
if !controllerutil.ContainsFinalizer(backend, objectmeta.MetricsFinalizer) {
135+
controllerutil.AddFinalizer(backend, objectmeta.MetricsFinalizer)
136+
if err := r.Update(ctx, backend); err != nil {
137+
klog.ErrorS(err, "Failed to add trafficManagerBackend metrics finalizer", "trafficManagerBackend", backendKRef)
138+
return ctrl.Result{}, err
139+
}
140+
}
141+
142+
defer emitTrafficManagerBackendStatusMetric(backend)
143+
116144
// TODO: replace the following with defaulter webhook
117145
defaulter.SetDefaultsTrafficManagerBackend(backend)
118146
return r.handleUpdate(ctx, backend)
119147
}
120148

121149
func (r *Reconciler) handleDelete(ctx context.Context, backend *fleetnetv1beta1.TrafficManagerBackend) (ctrl.Result, error) {
122150
backendKObj := klog.KObj(backend)
151+
needUpdate := false
123152
// The backend is being deleted
124-
if !controllerutil.ContainsFinalizer(backend, objectmeta.TrafficManagerBackendFinalizer) {
125-
klog.V(2).InfoS("TrafficManagerBackend is being deleted", "trafficManagerBackend", backendKObj)
126-
return ctrl.Result{}, nil
153+
if controllerutil.ContainsFinalizer(backend, objectmeta.MetricsFinalizer) {
154+
klog.V(2).InfoS("TrafficManagerBackend is being deleted and cleaning up its metrics", "trafficManagerBackend", backendKObj)
155+
// The controller registers backend finalizer only before creating atm backend to avoid the deletion stuck for the 403 error.
156+
// We use a separate finalizer to clean up the metrics for the backend.
157+
trafficManagerBackendStatusLastTimestampSeconds.DeletePartialMatch(prometheus.Labels{"namespace": backend.GetNamespace(), "name": backend.GetName()})
158+
controllerutil.RemoveFinalizer(backend, objectmeta.MetricsFinalizer)
159+
needUpdate = true
160+
}
161+
162+
if controllerutil.ContainsFinalizer(backend, objectmeta.TrafficManagerBackendFinalizer) {
163+
if err := r.deleteAzureTrafficManagerEndpoints(ctx, backend); err != nil {
164+
klog.ErrorS(err, "Failed to delete Azure Traffic Manager endpoints", "trafficManagerBackend", backendKObj)
165+
return ctrl.Result{}, err
166+
}
167+
controllerutil.RemoveFinalizer(backend, objectmeta.TrafficManagerBackendFinalizer)
168+
needUpdate = true
127169
}
128170

129-
if err := r.deleteAzureTrafficManagerEndpoints(ctx, backend); err != nil {
130-
klog.ErrorS(err, "Failed to delete Azure Traffic Manager endpoints", "trafficManagerBackend", backendKObj)
131-
return ctrl.Result{}, err
171+
if !needUpdate {
172+
klog.V(2).InfoS("No need to remove finalizer", "trafficManagerBackend", backendKObj)
173+
return ctrl.Result{}, nil
132174
}
133175

134-
controllerutil.RemoveFinalizer(backend, objectmeta.TrafficManagerBackendFinalizer)
135176
if err := r.Client.Update(ctx, backend); err != nil {
136-
klog.ErrorS(err, "Failed to remove trafficManagerBackend finalizer", "trafficManagerBackend", backendKObj)
177+
klog.ErrorS(err, "Failed to remove trafficManagerBackend finalizers", "trafficManagerBackend", backendKObj)
137178
return ctrl.Result{}, controller.NewUpdateIgnoreConflictError(err)
138179
}
139-
klog.V(2).InfoS("Removed trafficManagerBackend finalizer", "trafficManagerBackend", backendKObj)
180+
klog.V(2).InfoS("Removed trafficManagerBackend finalizers", "trafficManagerBackend", backendKObj)
140181
return ctrl.Result{}, nil
141182
}
142183

@@ -811,3 +852,18 @@ func (r *Reconciler) internalServiceExportEventHandler() handler.MapFunc {
811852
return []reconcile.Request{}
812853
}
813854
}
855+
856+
// emitTrafficManagerBackendStatusMetric emits the traffic manager backend status metric based on status conditions.
857+
func emitTrafficManagerBackendStatusMetric(backend *fleetnetv1beta1.TrafficManagerBackend) {
858+
generation := backend.Generation
859+
genStr := strconv.FormatInt(generation, 10)
860+
861+
cond := meta.FindStatusCondition(backend.Status.Conditions, string(fleetnetv1beta1.TrafficManagerBackendConditionAccepted))
862+
if cond != nil && cond.ObservedGeneration == generation {
863+
trafficManagerBackendStatusLastTimestampSeconds.WithLabelValues(backend.GetNamespace(), backend.GetName(), genStr,
864+
string(fleetnetv1beta1.TrafficManagerBackendConditionAccepted), string(cond.Status), cond.Reason).SetToCurrentTime()
865+
return
866+
}
867+
// We should rarely reach here, it can only happen when updating status fails.
868+
klog.V(2).InfoS("There's no accepted status condition on trafficManagerBackend, status updating failed possibly", "trafficManagerBackend", klog.KObj(backend))
869+
}

0 commit comments

Comments
 (0)