Skip to content

Commit 2d30829

Browse files
author
Wei
authored
Merge pull request #162 from cloudpilot-ai/cherry-pick-155-release-0.1
Cherry pick PR(155)/fix: calculate the overhead with the maximum value
2 parents f6a5fd3 + 36371d9 commit 2d30829

File tree

5 files changed

+50
-68
lines changed

5 files changed

+50
-68
lines changed

charts/karpenter/values.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ controller:
6464
# -- The external kubernetes cluster id for new nodes to connect with.
6565
clusterID: ""
6666
# -- The VM memory overhead as a percent that will be subtracted from the total memory for all instance types. The value of `0.075` equals to 7.5%.
67-
vmMemoryOverheadPercent: 0.075
67+
vmMemoryOverheadPercent: 0.065
6868
# -- The maximum length of a batch window. The longer this is, the more pods we can consider for provisioning at one
6969
# time which usually results in fewer but larger nodes.
7070
batchMaxDuration: 10s

pkg/operator/operator.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ func NewOperator(ctx context.Context, operator *operator.Operator) (context.Cont
108108

109109
unavailableOfferingsCache := alicache.NewUnavailableOfferings()
110110
instanceTypeProvider := instancetype.NewDefaultProvider(
111-
*ecsClient.RegionId, ecsClient,
111+
*ecsClient.RegionId, operator.GetClient(), ecsClient,
112112
cache.New(alicache.InstanceTypesAndZonesTTL, alicache.DefaultCleanupInterval),
113113
unavailableOfferingsCache,
114114
pricingProvider, ackProvider)

pkg/operator/options/options.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ type Options struct {
4444
func (o *Options) AddFlags(fs *coreoptions.FlagSet) {
4545
fs.StringVar(&o.ClusterID, "cluster-id", env.WithDefaultString("CLUSTER_ID", ""), "The external kubernetes cluster id for new nodes to connect with.")
4646
// TODO: for different OS, the overhead is different, find a way to fix this.
47-
fs.Float64Var(&o.VMMemoryOverheadPercent, "vm-memory-overhead-percent", utils.WithDefaultFloat64("VM_MEMORY_OVERHEAD_PERCENT", 0.075), "The VM memory overhead as a percent that will be subtracted from the total memory for all instance types.")
47+
fs.Float64Var(&o.VMMemoryOverheadPercent, "vm-memory-overhead-percent", utils.WithDefaultFloat64("VM_MEMORY_OVERHEAD_PERCENT", 0.065), "The VM memory overhead as a percent that will be subtracted from the total memory for all instance types.")
4848
fs.BoolVar(&o.Interruption, "interruption", env.WithDefaultBool("INTERRUPTION", true), "Enable interruption handling.")
4949
fs.BoolVar(&o.TelemetryShare, "telemetry-share", env.WithDefaultBool("TELEMETRY_SHARE", true), "Enable telemetry sharing.")
5050
fs.IntVar(&o.APGCreationQPS, "apg-qps", int(env.WithDefaultInt64("APG_CREATION_QPS", 100)), "The QPS limit for creating AutoProvisionGroup.")

pkg/providers/instancetype/instancetype.go

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,9 @@ import (
2929
"github.com/patrickmn/go-cache"
3030
"github.com/samber/lo"
3131
corev1 "k8s.io/api/core/v1"
32+
"k8s.io/apimachinery/pkg/api/resource"
3233
"k8s.io/apimachinery/pkg/util/sets"
34+
"sigs.k8s.io/controller-runtime/pkg/client"
3335
"sigs.k8s.io/controller-runtime/pkg/log"
3436
karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1"
3537
"sigs.k8s.io/karpenter/pkg/cloudprovider"
@@ -52,6 +54,7 @@ type Provider interface {
5254

5355
type DefaultProvider struct {
5456
region string
57+
kubeClient client.Client
5558
ecsClient *ecsclient.Client
5659
pricingProvider pricing.Provider
5760
ackProvider ack.Provider
@@ -77,10 +80,11 @@ type DefaultProvider struct {
7780
instanceTypesOfferingsSeqNum uint64
7881
}
7982

80-
func NewDefaultProvider(region string, ecsClient *ecsclient.Client,
83+
func NewDefaultProvider(region string, kubeClient client.Client, ecsClient *ecsclient.Client,
8184
instanceTypesCache *cache.Cache, unavailableOfferingsCache *kcache.UnavailableOfferings,
8285
pricingProvider pricing.Provider, ackProvider ack.Provider) *DefaultProvider {
8386
return &DefaultProvider{
87+
kubeClient: kubeClient,
8488
ecsClient: ecsClient,
8589
region: region,
8690
pricingProvider: pricingProvider,
@@ -175,6 +179,11 @@ func (p *DefaultProvider) List(ctx context.Context, kc *v1alpha1.KubeletConfigur
175179
return nil, fmt.Errorf("failed to get cluster CNI: %w", err)
176180
}
177181

182+
nodeResourceOverhead, err := p.nodeOverhead(ctx)
183+
if err != nil {
184+
return nil, fmt.Errorf("failed to get node resource overhead: %w", err)
185+
}
186+
178187
result := lo.Map(p.instanceTypesInfo, func(i *ecsclient.DescribeInstanceTypesResponseBodyInstanceTypesInstanceType, _ int) *cloudprovider.InstanceType {
179188
zoneData := lo.Map(allZones.UnsortedList(), func(zoneID string, _ int) ZoneData {
180189
if !p.instanceTypesOfferings[lo.FromPtr(i.InstanceTypeId)].Has(zoneID) || !vSwitchsZones.Has(zoneID) {
@@ -194,7 +203,7 @@ func (p *DefaultProvider) List(ctx context.Context, kc *v1alpha1.KubeletConfigur
194203
// so that Karpenter is able to cache the set of InstanceTypes based on values that alter the set of instance types
195204
// !!! Important !!!
196205
offers := p.createOfferings(ctx, *i.InstanceTypeId, zoneData)
197-
return NewInstanceType(ctx, i, kc, p.region, nodeClass.Spec.SystemDisk, offers, clusterCNI)
206+
return NewInstanceType(ctx, nodeResourceOverhead, i, kc, p.region, nodeClass.Spec.SystemDisk, offers, clusterCNI)
198207
})
199208

200209
// Filter out nil values
@@ -204,6 +213,37 @@ func (p *DefaultProvider) List(ctx context.Context, kc *v1alpha1.KubeletConfigur
204213
return result, nil
205214
}
206215

216+
func (p *DefaultProvider) nodeOverhead(ctx context.Context) (corev1.ResourceList, error) {
217+
var nodes corev1.NodeList
218+
if err := p.kubeClient.List(ctx, &nodes); err != nil {
219+
return corev1.ResourceList{}, err
220+
}
221+
222+
// We do not sure how to calculate the overhead of the node, let's just use the maximum possible
223+
// To avoid some loop node creation
224+
maxCPUOverHead := int64(0)
225+
maxMemoryOverHead := int64(0)
226+
for _, node := range nodes.Items {
227+
capacity := node.Status.Capacity
228+
allocatable := node.Status.Allocatable
229+
230+
cpuOverHead := capacity.Cpu().MilliValue() - allocatable.Cpu().MilliValue()
231+
memoryOverHead := capacity.Memory().Value() - allocatable.Memory().Value()
232+
233+
if cpuOverHead > maxCPUOverHead {
234+
maxCPUOverHead = cpuOverHead
235+
}
236+
if memoryOverHead > maxMemoryOverHead {
237+
maxMemoryOverHead = memoryOverHead
238+
}
239+
}
240+
241+
return corev1.ResourceList{
242+
corev1.ResourceCPU: *resource.NewMilliQuantity(maxCPUOverHead, resource.DecimalSI),
243+
corev1.ResourceMemory: *resource.NewQuantity(maxMemoryOverHead, resource.DecimalSI),
244+
}, nil
245+
}
246+
207247
func (p *DefaultProvider) UpdateInstanceTypes(ctx context.Context) error {
208248
// DO NOT REMOVE THIS LOCK ----------------------------------------------------------------------------
209249
// We lock here so that multiple callers to getInstanceTypesOfferings do not result in cache misses and multiple

pkg/providers/instancetype/types.go

Lines changed: 5 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,6 @@ var (
4242
)
4343

4444
const (
45-
MemoryAvailable = "memory.available"
46-
NodeFSAvailable = "nodefs.available"
47-
4845
GiBMiBRatio = 1024
4946
MiBByteRatio = 1024 * 1024
5047
TerwayMinENIRequirements = 11
@@ -60,7 +57,7 @@ type ZoneData struct {
6057
Available bool
6158
}
6259

63-
func NewInstanceType(ctx context.Context,
60+
func NewInstanceType(ctx context.Context, overhead corev1.ResourceList,
6461
info *ecsclient.DescribeInstanceTypesResponseBodyInstanceTypesInstanceType,
6562
kc *v1alpha1.KubeletConfiguration, region string, systemDisk *v1alpha1.SystemDisk,
6663
offerings cloudprovider.Offerings, clusterCNI string) *cloudprovider.InstanceType {
@@ -74,9 +71,10 @@ func NewInstanceType(ctx context.Context,
7471
Offerings: offerings,
7572
Capacity: computeCapacity(ctx, info, kc.MaxPods, kc.PodsPerCore, systemDisk, clusterCNI),
7673
Overhead: &cloudprovider.InstanceTypeOverhead{
77-
KubeReserved: kubeReservedResources(kc.KubeReserved),
78-
SystemReserved: systemReservedResources(kc.SystemReserved),
79-
EvictionThreshold: evictionThreshold(memory(ctx, info), ephemeralStorage(systemDisk), kc.EvictionHard, kc.EvictionSoft),
74+
// Follow overhead will be merged, so we can set only one overhead totally
75+
KubeReserved: overhead,
76+
SystemReserved: corev1.ResourceList{},
77+
EvictionThreshold: corev1.ResourceList{},
8078
},
8179
}
8280
if it.Requirements.Compatible(scheduling.NewRequirements(scheduling.NewRequirement(corev1.LabelOSStable, corev1.NodeSelectorOpIn, string(corev1.Windows)))) == nil {
@@ -178,62 +176,6 @@ func computeCapacity(ctx context.Context,
178176
return resourceList
179177
}
180178

181-
func kubeReservedResources(kubeReserved map[string]string) corev1.ResourceList {
182-
resources := corev1.ResourceList{
183-
// TODO: Following data is extract from real env
184-
// Please check it more
185-
corev1.ResourceMemory: resource.MustParse("447Mi"),
186-
corev1.ResourceCPU: resource.MustParse("35m"),
187-
}
188-
189-
return lo.Assign(resources, lo.MapEntries(kubeReserved, func(k string, v string) (corev1.ResourceName, resource.Quantity) {
190-
return corev1.ResourceName(k), resource.MustParse(v)
191-
}))
192-
}
193-
194-
func systemReservedResources(systemReserved map[string]string) corev1.ResourceList {
195-
resources := corev1.ResourceList{
196-
// TODO: Following data is extract from real env
197-
// Please check it more
198-
corev1.ResourceMemory: resource.MustParse("447Mi"),
199-
corev1.ResourceCPU: resource.MustParse("35m"),
200-
}
201-
202-
return lo.Assign(resources, lo.MapEntries(systemReserved, func(k string, v string) (corev1.ResourceName, resource.Quantity) {
203-
return corev1.ResourceName(k), resource.MustParse(v)
204-
}))
205-
}
206-
207-
func evictionThreshold(memory *resource.Quantity, storage *resource.Quantity, evictionHard map[string]string, evictionSoft map[string]string) corev1.ResourceList {
208-
overhead := corev1.ResourceList{
209-
// TODO: Following data is extract from real env
210-
// Please check it more
211-
corev1.ResourceMemory: resource.MustParse("300Mi"),
212-
}
213-
214-
override := corev1.ResourceList{}
215-
var evictionSignals []map[string]string
216-
if evictionHard != nil {
217-
evictionSignals = append(evictionSignals, evictionHard)
218-
}
219-
if evictionSoft != nil {
220-
evictionSignals = append(evictionSignals, evictionSoft)
221-
}
222-
223-
for _, m := range evictionSignals {
224-
temp := corev1.ResourceList{}
225-
if v, ok := m[MemoryAvailable]; ok {
226-
temp[corev1.ResourceMemory] = computeEvictionSignal(*memory, v)
227-
}
228-
if v, ok := m[NodeFSAvailable]; ok {
229-
temp[corev1.ResourceEphemeralStorage] = computeEvictionSignal(*storage, v)
230-
}
231-
override = resources.MaxResources(override, temp)
232-
}
233-
// Assign merges maps from left to right so overrides will always be taken last
234-
return lo.Assign(overhead, override)
235-
}
236-
237179
// computeEvictionSignal computes the resource quantity value for an eviction signal value, computed off the
238180
// base capacity value if the signal value is a percentage or as a resource quantity if the signal value isn't a percentage
239181
func computeEvictionSignal(capacity resource.Quantity, signalValue string) resource.Quantity {

0 commit comments

Comments
 (0)