@@ -45,23 +45,22 @@ func TestClusterCatalogUnpacking(t *testing.T) {
4545 require .Equal (ct , * managerDeployment .Spec .Replicas , managerDeployment .Status .ReadyReplicas )
4646 }, time .Minute , time .Second )
4747
48- var managerPod corev1.Pod
49- t .Log ("Waiting for only one controller-manager pod to remain" )
48+ t .Log ("Waiting for controller-manager pods to match the desired replica count" )
5049 require .EventuallyWithT (t , func (ct * assert.CollectT ) {
5150 var managerPods corev1.PodList
5251 err := c .List (ctx , & managerPods , client .MatchingLabels (managerLabelSelector ))
5352 require .NoError (ct , err )
54- require .Len (ct , managerPods .Items , 1 )
55- managerPod = managerPods .Items [0 ]
53+ require .Len (ct , managerPods .Items , int (* managerDeployment .Spec .Replicas ))
5654 }, time .Minute , time .Second )
5755
5856 t .Log ("Waiting for acquired leader election" )
5957 leaderCtx , leaderCancel := context .WithTimeout (ctx , 3 * time .Minute )
6058 defer leaderCancel ()
61- leaderSubstrings := []string {"successfully acquired lease" }
62- leaderElected , err := watchPodLogsForSubstring (leaderCtx , & managerPod , leaderSubstrings ... )
59+
60+ // When there are multiple replicas, find the leader pod
61+ managerPod , err := findLeaderPod (leaderCtx , "catalogd" )
6362 require .NoError (t , err )
64- require .True (t , leaderElected )
63+ require .NotNil (t , managerPod )
6564
6665 t .Log ("Reading logs to make sure that ClusterCatalog was reconciled by catalogdv1" )
6766 logCtx , cancel := context .WithTimeout (ctx , time .Minute )
@@ -70,7 +69,7 @@ func TestClusterCatalogUnpacking(t *testing.T) {
7069 "reconcile ending" ,
7170 fmt .Sprintf (`ClusterCatalog=%q` , testClusterCatalogName ),
7271 }
73- found , err := watchPodLogsForSubstring (logCtx , & managerPod , substrings ... )
72+ found , err := watchPodLogsForSubstring (logCtx , managerPod , substrings ... )
7473 require .NoError (t , err )
7574 require .True (t , found )
7675
@@ -103,22 +102,30 @@ func TestClusterExtensionAfterOLMUpgrade(t *testing.T) {
103102
104103 // wait for catalogd deployment to finish
105104 t .Log ("Wait for catalogd deployment to be ready" )
106- catalogdManagerPod := waitForDeployment (t , ctx , "catalogd" )
105+ _ = waitForDeployment (t , ctx , "catalogd" )
106+
107+ // Find the catalogd leader pod
108+ catalogdLeaderCtx , catalogdLeaderCancel := context .WithTimeout (ctx , 3 * time .Minute )
109+ defer catalogdLeaderCancel ()
110+ catalogdManagerPod , err := findLeaderPod (catalogdLeaderCtx , "catalogd" )
111+ require .NoError (t , err )
112+ require .NotNil (t , catalogdManagerPod )
107113
108114 // wait for operator-controller deployment to finish
109115 t .Log ("Wait for operator-controller deployment to be ready" )
110- managerPod : = waitForDeployment (t , ctx , "operator-controller" )
116+ _ = waitForDeployment (t , ctx , "operator-controller" )
111117
112118 t .Log ("Wait for acquired leader election" )
113119 // Average case is under 1 minute but in the worst case: (previous leader crashed)
114120 // we could have LeaseDuration (137s) + RetryPeriod (26s) +/- 163s
115121 leaderCtx , leaderCancel := context .WithTimeout (ctx , 3 * time .Minute )
116122 defer leaderCancel ()
117123
118- leaderSubstrings := []string {"successfully acquired lease" }
119- leaderElected , err := watchPodLogsForSubstring (leaderCtx , managerPod , leaderSubstrings ... )
124+ // When there are multiple replicas, find the leader pod
125+ var managerPod * corev1.Pod
126+ managerPod , err = findLeaderPod (leaderCtx , "operator-controller" )
120127 require .NoError (t , err )
121- require .True (t , leaderElected )
128+ require .NotNil (t , managerPod )
122129
123130 t .Log ("Reading logs to make sure that ClusterExtension was reconciled by operator-controller before we update it" )
124131 // Make sure that after we upgrade OLM itself we can still reconcile old objects without any changes
@@ -221,11 +228,48 @@ func waitForDeployment(t *testing.T, ctx context.Context, controlPlaneLabel stri
221228 t .Logf ("Ensure the number of remaining pods equal the desired number of replicas (%d)" , desiredNumReplicas )
222229 require .EventuallyWithT (t , func (ct * assert.CollectT ) {
223230 require .NoError (ct , c .List (ctx , & managerPods , client.MatchingLabelsSelector {Selector : deploymentLabelSelector }))
224- require .Len (ct , managerPods .Items , 1 )
231+ require .Len (ct , managerPods .Items , int ( desiredNumReplicas ) )
225232 }, time .Minute , time .Second )
226233 return & managerPods .Items [0 ]
227234}
228235
236+ // findLeaderPod finds the pod that has acquired the leader lease by checking logs of all pods
237+ func findLeaderPod (ctx context.Context , controlPlaneLabel string ) (* corev1.Pod , error ) {
238+ deploymentLabelSelector := labels.Set {"app.kubernetes.io/name" : controlPlaneLabel }.AsSelector ()
239+
240+ var managerPods corev1.PodList
241+ if err := c .List (ctx , & managerPods , client.MatchingLabelsSelector {Selector : deploymentLabelSelector }); err != nil {
242+ return nil , fmt .Errorf ("failed to list pods: %w" , err )
243+ }
244+
245+ if len (managerPods .Items ) == 0 {
246+ return nil , fmt .Errorf ("no pods found for label %s" , controlPlaneLabel )
247+ }
248+
249+ // If there's only one pod, it must be the leader
250+ if len (managerPods .Items ) == 1 {
251+ return & managerPods .Items [0 ], nil
252+ }
253+
254+ // Check each pod's logs for leader election message
255+ leaderSubstrings := []string {"successfully acquired lease" }
256+ for i := range managerPods .Items {
257+ pod := & managerPods .Items [i ]
258+
259+ // Check if this pod has acquired the lease
260+ isLeader , err := watchPodLogsForSubstring (ctx , pod , leaderSubstrings ... )
261+ if err != nil {
262+ // If we can't read logs from this pod, try the next one
263+ continue
264+ }
265+ if isLeader {
266+ return pod , nil
267+ }
268+ }
269+
270+ return nil , fmt .Errorf ("no leader pod found among %d pods" , len (managerPods .Items ))
271+ }
272+
229273func watchPodLogsForSubstring (ctx context.Context , pod * corev1.Pod , substrings ... string ) (bool , error ) {
230274 podLogOpts := corev1.PodLogOptions {
231275 Follow : true ,
0 commit comments