-
Notifications
You must be signed in to change notification settings - Fork 46
feat(controller): ConfigMap image mapping override #169
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -33,6 +33,7 @@ import ( | |
|
|
||
| "github.com/go-logr/logr" | ||
| "github.com/google/go-cmp/cmp" | ||
| "github.com/google/go-containerregistry/pkg/name" | ||
| llamav1alpha1 "github.com/llamastack/llama-stack-k8s-operator/api/v1alpha1" | ||
| "github.com/llamastack/llama-stack-k8s-operator/pkg/cluster" | ||
| "github.com/llamastack/llama-stack-k8s-operator/pkg/deploy" | ||
|
|
@@ -83,11 +84,17 @@ const ( | |
| // When a ConfigMap's data changes, it automatically triggers reconciliation of the referencing | ||
| // LlamaStackDistribution, which recalculates a content-based hash and updates the deployment's | ||
| // pod template annotations. This causes Kubernetes to restart the pods with the updated configuration. | ||
| // | ||
| // Operator ConfigMap Watching Feature: | ||
| // This reconciler also watches for changes to the operator configuration ConfigMap. When the operator | ||
| // config changes, it triggers reconciliation of all LlamaStackDistribution resources. | ||
| type LlamaStackDistributionReconciler struct { | ||
| client.Client | ||
| Scheme *runtime.Scheme | ||
| // Feature flags | ||
| EnableNetworkPolicy bool | ||
| // Image mapping overrides | ||
| ImageMappingOverrides map[string]string | ||
| // Cluster info | ||
| ClusterInfo *cluster.ClusterInfo | ||
| httpClient *http.Client | ||
|
|
@@ -593,6 +600,40 @@ func (r *LlamaStackDistributionReconciler) configMapUpdatePredicate(e event.Upda | |
| return false | ||
| } | ||
|
|
||
| // Check if this is the operator config ConfigMap | ||
| if r.handleOperatorConfigUpdate(newConfigMap) { | ||
| return true | ||
| } | ||
|
|
||
| // Handle referenced ConfigMap updates | ||
| return r.handleReferencedConfigMapUpdate(oldConfigMap, newConfigMap) | ||
| } | ||
|
|
||
| // handleOperatorConfigUpdate processes updates to the operator config ConfigMap. | ||
| func (r *LlamaStackDistributionReconciler) handleOperatorConfigUpdate(configMap *corev1.ConfigMap) bool { | ||
| operatorNamespace, err := deploy.GetOperatorNamespace() | ||
| if err != nil { | ||
| return false | ||
| } | ||
|
|
||
| if configMap.Name != operatorConfigData || configMap.Namespace != operatorNamespace { | ||
| return false | ||
| } | ||
|
|
||
| // Update feature flags | ||
| EnableNetworkPolicy, err := parseFeatureFlags(configMap.Data) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is network policy enabled/disable via a feature flag (which is used for enabling/disabling experimental features which become eventually permanent features (or dropped)). For me it looks like the usage of NetworkPolicies is an admin decision so that it shoudl be part of the regular configuration. A feature flag could handle if this configuration has any effect, but the overall switch network policy yes/no should be part of LLSD. |
||
| if err != nil { | ||
| log.FromContext(context.Background()).Error(err, "Failed to parse feature flags") | ||
| } else { | ||
| r.EnableNetworkPolicy = EnableNetworkPolicy | ||
| } | ||
|
|
||
| r.ImageMappingOverrides = ParseImageMappingOverrides(context.Background(), configMap.Data) | ||
| return true | ||
| } | ||
|
|
||
| // handleReferencedConfigMapUpdate processes updates to referenced ConfigMaps. | ||
| func (r *LlamaStackDistributionReconciler) handleReferencedConfigMapUpdate(oldConfigMap, newConfigMap *corev1.ConfigMap) bool { | ||
| // Only proceed if this ConfigMap is referenced by any LlamaStackDistribution | ||
| if !r.isConfigMapReferenced(newConfigMap) { | ||
| return false | ||
|
|
@@ -758,6 +799,27 @@ func (r *LlamaStackDistributionReconciler) manuallyCheckConfigMapReference(confi | |
|
|
||
| // findLlamaStackDistributionsForConfigMap maps ConfigMap changes to LlamaStackDistribution reconcile requests. | ||
| func (r *LlamaStackDistributionReconciler) findLlamaStackDistributionsForConfigMap(ctx context.Context, configMap client.Object) []reconcile.Request { | ||
| logger := log.FromContext(ctx).WithValues( | ||
| "configMapName", configMap.GetName(), | ||
| "configMapNamespace", configMap.GetNamespace()) | ||
|
|
||
| operatorNamespace, err := deploy.GetOperatorNamespace() | ||
| if err != nil { | ||
| logger.Error(err, "Failed to get operator namespace for config map event processing") | ||
| return nil | ||
| } | ||
| // If the operator config was changed, we reconcile all LlamaStackDistributions | ||
| if configMap.GetName() == operatorConfigData && configMap.GetNamespace() == operatorNamespace { | ||
| // List all LlamaStackDistribution resources across all namespaces | ||
| allLlamaStacks := llamav1alpha1.LlamaStackDistributionList{} | ||
| err = r.List(ctx, &allLlamaStacks) | ||
| if err != nil { | ||
| logger.Error(err, "Failed to list all LlamaStackDistributions for operator config change") | ||
| return nil | ||
| } | ||
| return r.convertToReconcileRequests(allLlamaStacks) | ||
| } | ||
|
Comment on lines
+811
to
+821
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm thinking if there happen to be a lot of LlamaStackDistributions on a large cluster, we might want to introduce some jittering / rate limiting so there's not an immediate thundering herd effect. (More like an optimization than a requirement for this PR.)
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good idea, we could add this to the backlog |
||
|
|
||
| // Try field indexer lookup first | ||
| attachedLlamaStacks, found := r.tryFieldIndexerLookup(ctx, configMap) | ||
| if !found { | ||
|
|
@@ -1632,53 +1694,103 @@ func NewLlamaStackDistributionReconciler(ctx context.Context, client client.Clie | |
| return nil, fmt.Errorf("failed to get operator namespace: %w", err) | ||
| } | ||
|
|
||
| // Get the ConfigMap | ||
| // If the ConfigMap doesn't exist, create it with default feature flags | ||
| // If the ConfigMap exists, parse the feature flags from the Configmap | ||
| // Initialize operator config ConfigMap | ||
| configMap, err := initializeOperatorConfigMap(ctx, client, operatorNamespace) | ||
| if err != nil { | ||
| return nil, err | ||
| } | ||
|
|
||
| // Parse feature flags from ConfigMap | ||
| enableNetworkPolicy, err := parseFeatureFlags(configMap.Data) | ||
| if err != nil { | ||
| return nil, fmt.Errorf("failed to parse feature flags: %w", err) | ||
| } | ||
|
|
||
| // Parse image mapping overrides from ConfigMap | ||
| imageMappingOverrides := ParseImageMappingOverrides(ctx, configMap.Data) | ||
|
|
||
| return &LlamaStackDistributionReconciler{ | ||
| Client: client, | ||
| Scheme: scheme, | ||
| EnableNetworkPolicy: enableNetworkPolicy, | ||
| ImageMappingOverrides: imageMappingOverrides, | ||
| ClusterInfo: clusterInfo, | ||
| httpClient: &http.Client{Timeout: 5 * time.Second}, | ||
| }, nil | ||
| } | ||
|
|
||
| // initializeOperatorConfigMap gets or creates the operator config ConfigMap. | ||
| func initializeOperatorConfigMap(ctx context.Context, c client.Client, operatorNamespace string) (*corev1.ConfigMap, error) { | ||
| configMap := &corev1.ConfigMap{} | ||
| configMapName := types.NamespacedName{ | ||
| Name: operatorConfigData, | ||
| Namespace: operatorNamespace, | ||
| } | ||
|
|
||
| if err = client.Get(ctx, configMapName, configMap); err != nil { | ||
| if !k8serrors.IsNotFound(err) { | ||
| return nil, fmt.Errorf("failed to get ConfigMap: %w", err) | ||
| } | ||
| err := c.Get(ctx, configMapName, configMap) | ||
| if err == nil { | ||
| return configMap, nil | ||
| } | ||
|
|
||
| // ConfigMap doesn't exist, create it with defaults | ||
| configMap, err = createDefaultConfigMap(configMapName) | ||
| if err != nil { | ||
| return nil, fmt.Errorf("failed to generate default configMap: %w", err) | ||
| if !k8serrors.IsNotFound(err) { | ||
| return nil, fmt.Errorf("failed to get ConfigMap: %w", err) | ||
| } | ||
|
|
||
| // ConfigMap doesn't exist, create it with defaults | ||
| configMap, err = createDefaultConfigMap(configMapName) | ||
| if err != nil { | ||
| return nil, fmt.Errorf("failed to generate default configMap: %w", err) | ||
| } | ||
|
|
||
| if err = c.Create(ctx, configMap); err != nil { | ||
| return nil, fmt.Errorf("failed to create ConfigMap: %w", err) | ||
| } | ||
|
|
||
| return configMap, nil | ||
| } | ||
|
|
||
| func ParseImageMappingOverrides(ctx context.Context, configMapData map[string]string) map[string]string { | ||
| imageMappingOverrides := make(map[string]string) | ||
| logger := log.FromContext(ctx) | ||
|
|
||
| // Look for the image-overrides key in the ConfigMap data | ||
| if overridesYAML, exists := configMapData["image-overrides"]; exists { | ||
| // Parse the YAML content | ||
| var overrides map[string]string | ||
| if err := yaml.Unmarshal([]byte(overridesYAML), &overrides); err != nil { | ||
| // Log error but continue with empty overrides | ||
| logger.V(1).Info("failed to parse image-overrides YAML", "error", err) | ||
| return imageMappingOverrides | ||
| } | ||
|
|
||
| if err = client.Create(ctx, configMap); err != nil { | ||
| return nil, fmt.Errorf("failed to create ConfigMap: %w", err) | ||
| // Validate and copy the parsed overrides to our result map | ||
| for version, image := range overrides { | ||
| // Validate the image reference format | ||
| if _, err := name.ParseReference(image); err != nil { | ||
| logger.V(1).Info( | ||
| "skipping invalid image override", | ||
| "version", version, | ||
| "image", image, | ||
| "error", err, | ||
| ) | ||
| continue | ||
| } | ||
| imageMappingOverrides[version] = image | ||
| } | ||
| } | ||
|
|
||
| // Parse feature flags from ConfigMap | ||
| enableNetworkPolicy, err := parseFeatureFlags(configMap.Data) | ||
| if err != nil { | ||
| return nil, fmt.Errorf("failed to parse feature flags: %w", err) | ||
| } | ||
| return &LlamaStackDistributionReconciler{ | ||
| Client: client, | ||
| Scheme: scheme, | ||
| EnableNetworkPolicy: enableNetworkPolicy, | ||
| ClusterInfo: clusterInfo, | ||
| httpClient: &http.Client{Timeout: 5 * time.Second}, | ||
| }, nil | ||
| return imageMappingOverrides | ||
| } | ||
|
|
||
| // NewTestReconciler creates a reconciler for testing, allowing injection of a custom http client and feature flags. | ||
| func NewTestReconciler(client client.Client, scheme *runtime.Scheme, clusterInfo *cluster.ClusterInfo, | ||
| httpClient *http.Client, enableNetworkPolicy bool) *LlamaStackDistributionReconciler { | ||
| return &LlamaStackDistributionReconciler{ | ||
| Client: client, | ||
| Scheme: scheme, | ||
| ClusterInfo: clusterInfo, | ||
| httpClient: httpClient, | ||
| EnableNetworkPolicy: enableNetworkPolicy, | ||
| Client: client, | ||
| Scheme: scheme, | ||
| ClusterInfo: clusterInfo, | ||
| httpClient: httpClient, | ||
| EnableNetworkPolicy: enableNetworkPolicy, | ||
| ImageMappingOverrides: make(map[string]string), | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this is confessedly harder, when the version is stored in a map's value, so maybe my former comment on putting the version into a value, not the key, might make these kind of updates harder.