@@ -24,6 +24,7 @@ import (
2424 "errors"
2525 "fmt"
2626 "net/http"
27+ "sort"
2728 "strings"
2829 "sync"
2930
@@ -105,6 +106,51 @@ func (p *DefaultProvider) GetClusterCNI(_ context.Context) (string, error) {
105106 return p .clusterCNI , nil
106107}
107108
109+ // Get the ID of the target nodepool id when DescribeClusterAttachScriptsRequest.
110+ // If there is no default nodepool, select the nodepool with the most HealthyNodes.
111+ //
112+ //nolint:gocyclo
113+ func (p * DefaultProvider ) getTargetNodePoolID (ctx context.Context ) (* string , error ) {
114+ resp , err := p .ackClient .DescribeClusterNodePools (tea .String (p .clusterID ), & ackclient.DescribeClusterNodePoolsRequest {})
115+ if err != nil {
116+ log .FromContext (ctx ).Error (err , "Failed to describe cluster nodepools" )
117+ return nil , err
118+ }
119+ if resp == nil || resp .Body == nil || resp .Body .Nodepools == nil {
120+ return nil , fmt .Errorf ("empty describe cluster nodepools response" )
121+ }
122+ if len (resp .Body .Nodepools ) == 0 {
123+ return nil , fmt .Errorf ("no nodepool found" )
124+ }
125+
126+ nodepools := resp .Body .Nodepools
127+ sort .Slice (nodepools , func (i , j int ) bool {
128+ if nodepools [i ].NodepoolInfo == nil || nodepools [j ].NodepoolInfo == nil {
129+ return false
130+ }
131+
132+ if nodepools [i ].NodepoolInfo .IsDefault != nil && nodepools [j ].NodepoolInfo .IsDefault != nil {
133+ if * nodepools [i ].NodepoolInfo .IsDefault && ! * nodepools [j ].NodepoolInfo .IsDefault {
134+ return true
135+ }
136+ if ! * nodepools [i ].NodepoolInfo .IsDefault && * nodepools [j ].NodepoolInfo .IsDefault {
137+ return false
138+ }
139+ }
140+
141+ if nodepools [i ].Status == nil || nodepools [j ].Status == nil || nodepools [i ].Status .HealthyNodes == nil || nodepools [j ].Status .HealthyNodes == nil {
142+ return false
143+ }
144+ return * nodepools [i ].Status .HealthyNodes > * nodepools [j ].Status .HealthyNodes
145+ })
146+
147+ targetNodepool := nodepools [0 ]
148+ if targetNodepool .NodepoolInfo == nil {
149+ return nil , fmt .Errorf ("target describe cluster nodepool is empty" )
150+ }
151+ return targetNodepool .NodepoolInfo .NodepoolId , nil
152+ }
153+
108154func (p * DefaultProvider ) GetNodeRegisterScript (ctx context.Context ,
109155 capacityType string ,
110156 nodeClaim * karpv1.NodeClaim ,
@@ -114,8 +160,19 @@ func (p *DefaultProvider) GetNodeRegisterScript(ctx context.Context,
114160 return p .resolveUserData (cachedScript .(string ), labels , nodeClaim , kubeletCfg ), nil
115161 }
116162
163+ nodepoolID , err := p .getTargetNodePoolID (ctx )
164+ if err != nil {
165+ // Don't return here, we can process when there is no default cluster id.
166+ // We need to try to obtain a usable nodepool ID in order to get the cluster attach scripts.
167+ // One known scenario is on an ACK cluster with version 1.24, where the user deleted the default nodepool and
168+ // created a nodepool with a containerd runtime. The DescribeClusterAttachScriptsRequest api will use the
169+ // CRI configuration of the deleted default nodepool, which might be using the Docker runtime.
170+ // This could result in nodes failing to register to the new cluster.
171+ log .FromContext (ctx ).Error (err , "Failed to get default nodepool id" )
172+ }
117173 reqPara := & ackclient.DescribeClusterAttachScriptsRequest {
118174 KeepInstanceName : tea .Bool (true ),
175+ NodepoolId : nodepoolID ,
119176 }
120177 resp , err := p .ackClient .DescribeClusterAttachScripts (tea .String (p .clusterID ), reqPara )
121178 if err != nil {
0 commit comments