Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion api/v1alpha1/llamastackdistribution_types.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions config/crd/bases/llamastack.io_llamastackdistributions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2568,6 +2568,13 @@ spec:
required:
- configMapName
type: object
workers:
description: |-
Workers configures the number of uvicorn worker processes to run.
When set, the operator will launch llama-stack using uvicorn with the specified worker count.
format: int32
minimum: 1
type: integer
required:
- distribution
type: object
Expand Down
1 change: 1 addition & 0 deletions config/samples/_v1alpha1_llamastackdistribution.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ spec:
name: llama-stack
distribution:
name: starter
workers: 2
podDisruptionBudget:
minAvailable: 1
topologySpreadConstraints:
Expand Down
33 changes: 30 additions & 3 deletions controllers/resource_helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"errors"
"fmt"
"regexp"
"strconv"
"strings"

llamav1alpha1 "github.com/llamastack/llama-stack-k8s-operator/api/v1alpha1"
Expand Down Expand Up @@ -91,21 +92,26 @@ try:
print('Using core module path (llama_stack.core.server.server)', file=sys.stderr)
print(1)
else:
print('Using new CLI command (llama stack run)', file=sys.stderr)
print('Using uvicorn CLI command', file=sys.stderr)
print(2)
except Exception as e:
print(f'Version detection failed, defaulting to new CLI: {e}', file=sys.stderr)
print(2)
")
PORT=${LLS_PORT:-8321}
WORKERS=${LLS_WORKERS:-1}
# Execute the appropriate CLI based on version
case $VERSION_CODE in
0) python3 -m llama_stack.distribution.server.server --config /etc/llama-stack/run.yaml ;;
1) python3 -m llama_stack.core.server.server /etc/llama-stack/run.yaml ;;
2) llama stack run /etc/llama-stack/run.yaml ;;
*) echo "Invalid version code: $VERSION_CODE, using new CLI"; llama stack run /etc/llama-stack/run.yaml ;;
2) exec uvicorn llama_stack.core.server.server:create_app --host 0.0.0.0 --port "$PORT" --workers "$WORKERS" --factory ;;
*) exec uvicorn llama_stack.core.server.server:create_app --host 0.0.0.0 --port "$PORT" --workers "$WORKERS" --factory ;;
esac`

const llamaStackConfigPath = "/etc/llama-stack/run.yaml"

// validateConfigMapKeys validates that all ConfigMap keys contain only safe characters.
// Note: This function validates key names only. PEM content validation is performed
// separately in the controller's reconcileCABundleConfigMap function.
Expand Down Expand Up @@ -227,6 +233,27 @@ func configureContainerEnvironment(ctx context.Context, r *LlamaStackDistributio
})
}

// Always provide worker/port/config env for uvicorn; workers default to 1 when unspecified.
workers := instance.Spec.Server.Workers
if workers == nil {
defaultWorkers := int32(1)
workers = &defaultWorkers
}
container.Env = append(container.Env,
corev1.EnvVar{
Name: "LLS_WORKERS",
Value: strconv.Itoa(int(*workers)),
},
corev1.EnvVar{
Name: "LLS_PORT",
Value: strconv.Itoa(int(getContainerPort(instance))),
},
corev1.EnvVar{
Name: "LLAMA_STACK_CONFIG",
Value: llamaStackConfigPath,
},
)

// Finally, add the user provided env vars
container.Env = append(container.Env, instance.Spec.Server.ContainerSpec.Env...)
}
Expand Down
49 changes: 49 additions & 0 deletions controllers/resource_helper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ import (
"k8s.io/apimachinery/pkg/util/intstr"
)

func int32Ptr(val int32) *int32 {
return &val
}

func TestBuildContainerSpec(t *testing.T) {
testCases := []struct {
name string
Expand Down Expand Up @@ -66,6 +70,9 @@ func TestBuildContainerSpec(t *testing.T) {
}},
Env: []corev1.EnvVar{
{Name: "HF_HOME", Value: "/.llama"},
{Name: "LLS_WORKERS", Value: "1"},
{Name: "LLS_PORT", Value: "8321"},
{Name: "LLAMA_STACK_CONFIG", Value: "/etc/llama-stack/run.yaml"},
},
},
},
Expand Down Expand Up @@ -111,6 +118,9 @@ func TestBuildContainerSpec(t *testing.T) {
},
Env: []corev1.EnvVar{
{Name: "HF_HOME", Value: "/custom/path"},
{Name: "LLS_WORKERS", Value: "1"},
{Name: "LLS_PORT", Value: "9000"},
{Name: "LLAMA_STACK_CONFIG", Value: "/etc/llama-stack/run.yaml"},
{Name: "TEST_ENV", Value: "test-value"},
},
VolumeMounts: []corev1.VolumeMount{{
Expand Down Expand Up @@ -152,7 +162,43 @@ func TestBuildContainerSpec(t *testing.T) {
}},
Env: []corev1.EnvVar{
{Name: "HF_HOME", Value: "/.llama"},
{Name: "LLS_WORKERS", Value: "1"},
{Name: "LLS_PORT", Value: "8321"},
{Name: "LLAMA_STACK_CONFIG", Value: "/etc/llama-stack/run.yaml"},
},
},
},
{
name: "uvicorn workers configured",
instance: &llamav1alpha1.LlamaStackDistribution{
Spec: llamav1alpha1.LlamaStackDistributionSpec{
Server: llamav1alpha1.ServerSpec{
Workers: int32Ptr(4),
},
},
},
image: "test-image:latest",
expectedResult: corev1.Container{
Name: llamav1alpha1.DefaultContainerName,
Image: "test-image:latest",
Resources: corev1.ResourceRequirements{
Requests: corev1.ResourceList{
corev1.ResourceCPU: llamav1alpha1.DefaultServerCPURequest,
corev1.ResourceMemory: llamav1alpha1.DefaultServerMemoryRequest,
},
},
Ports: []corev1.ContainerPort{{ContainerPort: llamav1alpha1.DefaultServerPort}},
StartupProbe: newDefaultStartupProbe(llamav1alpha1.DefaultServerPort),
Env: []corev1.EnvVar{
{Name: "HF_HOME", Value: "/.llama"},
{Name: "LLS_WORKERS", Value: "4"},
{Name: "LLS_PORT", Value: "8321"},
{Name: "LLAMA_STACK_CONFIG", Value: "/etc/llama-stack/run.yaml"},
},
VolumeMounts: []corev1.VolumeMount{{
Name: "lls-storage",
MountPath: llamav1alpha1.DefaultMountPath,
}},
},
},
{
Expand Down Expand Up @@ -187,6 +233,9 @@ func TestBuildContainerSpec(t *testing.T) {
Args: []string{},
Env: []corev1.EnvVar{
{Name: "HF_HOME", Value: llamav1alpha1.DefaultMountPath},
{Name: "LLS_WORKERS", Value: "1"},
{Name: "LLS_PORT", Value: "8321"},
{Name: "LLAMA_STACK_CONFIG", Value: "/etc/llama-stack/run.yaml"},
},
VolumeMounts: []corev1.VolumeMount{
{
Expand Down
1 change: 1 addition & 0 deletions docs/api-overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ _Appears in:_
| --- | --- | --- | --- |
| `distribution` _[DistributionType](#distributiontype)_ | | | |
| `containerSpec` _[ContainerSpec](#containerspec)_ | | | |
| `workers` _integer_ | Workers configures the number of uvicorn worker processes to run.<br />When set, the operator will launch llama-stack using uvicorn with the specified worker count. | | Minimum: 1 <br /> |
| `podOverrides` _[PodOverrides](#podoverrides)_ | | | |
| `podDisruptionBudget` _[PodDisruptionBudgetSpec](#poddisruptionbudgetspec)_ | PodDisruptionBudget controls voluntary disruption tolerance for the server pods | | |
| `topologySpreadConstraints` _[TopologySpreadConstraint](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.31/#topologyspreadconstraint-v1-core) array_ | TopologySpreadConstraints defines fine-grained spreading rules | | |
Expand Down
7 changes: 7 additions & 0 deletions release/operator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2577,6 +2577,13 @@ spec:
required:
- configMapName
type: object
workers:
description: |-
Workers configures the number of uvicorn worker processes to run.
When set, the operator will launch llama-stack using uvicorn with the specified worker count.
format: int32
minimum: 1
type: integer
required:
- distribution
type: object
Expand Down