Skip to content

Commit 5c03fe0

Browse files
committed
feat: Add support for uvicorn workers for llama-stack
Signed-off-by: Vaishnavi Hire <vhire@redhat.com>
1 parent 16feef0 commit 5c03fe0

File tree

8 files changed

+106
-4
lines changed

8 files changed

+106
-4
lines changed

api/v1alpha1/llamastackdistribution_types.go

Lines changed: 6 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

api/v1alpha1/zz_generated.deepcopy.go

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/llamastack.io_llamastackdistributions.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2568,6 +2568,13 @@ spec:
25682568
required:
25692569
- configMapName
25702570
type: object
2571+
workers:
2572+
description: |-
2573+
Workers configures the number of uvicorn worker processes to run.
2574+
When set, the operator will launch llama-stack using uvicorn with the specified worker count.
2575+
format: int32
2576+
minimum: 1
2577+
type: integer
25712578
required:
25722579
- distribution
25732580
type: object

config/samples/_v1alpha1_llamastackdistribution.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ spec:
1414
name: llama-stack
1515
distribution:
1616
name: starter
17+
workers: 2
1718
podDisruptionBudget:
1819
minAvailable: 1
1920
topologySpreadConstraints:

controllers/resource_helper.go

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"errors"
2222
"fmt"
2323
"regexp"
24+
"strconv"
2425
"strings"
2526

2627
llamav1alpha1 "github.com/llamastack/llama-stack-k8s-operator/api/v1alpha1"
@@ -91,21 +92,26 @@ try:
9192
print('Using core module path (llama_stack.core.server.server)', file=sys.stderr)
9293
print(1)
9394
else:
94-
print('Using new CLI command (llama stack run)', file=sys.stderr)
95+
print('Using uvicorn CLI command', file=sys.stderr)
9596
print(2)
9697
except Exception as e:
9798
print(f'Version detection failed, defaulting to new CLI: {e}', file=sys.stderr)
9899
print(2)
99100
")
100101
102+
PORT=${LLS_PORT:-8321}
103+
WORKERS=${LLS_WORKERS:-1}
104+
101105
# Execute the appropriate CLI based on version
102106
case $VERSION_CODE in
103107
0) python3 -m llama_stack.distribution.server.server --config /etc/llama-stack/run.yaml ;;
104108
1) python3 -m llama_stack.core.server.server /etc/llama-stack/run.yaml ;;
105-
2) llama stack run /etc/llama-stack/run.yaml ;;
106-
*) echo "Invalid version code: $VERSION_CODE, using new CLI"; llama stack run /etc/llama-stack/run.yaml ;;
109+
2) exec uvicorn llama_stack.core.server.server:create_app --host 0.0.0.0 --port "$PORT" --workers "$WORKERS" --factory ;;
110+
*) exec uvicorn llama_stack.core.server.server:create_app --host 0.0.0.0 --port "$PORT" --workers "$WORKERS" --factory ;;
107111
esac`
108112

113+
const llamaStackConfigPath = "/etc/llama-stack/run.yaml"
114+
109115
// validateConfigMapKeys validates that all ConfigMap keys contain only safe characters.
110116
// Note: This function validates key names only. PEM content validation is performed
111117
// separately in the controller's reconcileCABundleConfigMap function.
@@ -227,6 +233,27 @@ func configureContainerEnvironment(ctx context.Context, r *LlamaStackDistributio
227233
})
228234
}
229235

236+
// Always provide worker/port/config env for uvicorn; workers default to 1 when unspecified.
237+
workers := instance.Spec.Server.Workers
238+
if workers == nil {
239+
defaultWorkers := int32(1)
240+
workers = &defaultWorkers
241+
}
242+
container.Env = append(container.Env,
243+
corev1.EnvVar{
244+
Name: "LLS_WORKERS",
245+
Value: strconv.Itoa(int(*workers)),
246+
},
247+
corev1.EnvVar{
248+
Name: "LLS_PORT",
249+
Value: strconv.Itoa(int(getContainerPort(instance))),
250+
},
251+
corev1.EnvVar{
252+
Name: "LLAMA_STACK_CONFIG",
253+
Value: llamaStackConfigPath,
254+
},
255+
)
256+
230257
// Finally, add the user provided env vars
231258
container.Env = append(container.Env, instance.Spec.Server.ContainerSpec.Env...)
232259
}

controllers/resource_helper_test.go

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@ import (
3232
"k8s.io/apimachinery/pkg/util/intstr"
3333
)
3434

35+
func int32Ptr(val int32) *int32 {
36+
return &val
37+
}
38+
3539
func TestBuildContainerSpec(t *testing.T) {
3640
testCases := []struct {
3741
name string
@@ -66,6 +70,9 @@ func TestBuildContainerSpec(t *testing.T) {
6670
}},
6771
Env: []corev1.EnvVar{
6872
{Name: "HF_HOME", Value: "/.llama"},
73+
{Name: "LLS_WORKERS", Value: "1"},
74+
{Name: "LLS_PORT", Value: "8321"},
75+
{Name: "LLAMA_STACK_CONFIG", Value: "/etc/llama-stack/run.yaml"},
6976
},
7077
},
7178
},
@@ -111,6 +118,9 @@ func TestBuildContainerSpec(t *testing.T) {
111118
},
112119
Env: []corev1.EnvVar{
113120
{Name: "HF_HOME", Value: "/custom/path"},
121+
{Name: "LLS_WORKERS", Value: "1"},
122+
{Name: "LLS_PORT", Value: "9000"},
123+
{Name: "LLAMA_STACK_CONFIG", Value: "/etc/llama-stack/run.yaml"},
114124
{Name: "TEST_ENV", Value: "test-value"},
115125
},
116126
VolumeMounts: []corev1.VolumeMount{{
@@ -152,7 +162,43 @@ func TestBuildContainerSpec(t *testing.T) {
152162
}},
153163
Env: []corev1.EnvVar{
154164
{Name: "HF_HOME", Value: "/.llama"},
165+
{Name: "LLS_WORKERS", Value: "1"},
166+
{Name: "LLS_PORT", Value: "8321"},
167+
{Name: "LLAMA_STACK_CONFIG", Value: "/etc/llama-stack/run.yaml"},
168+
},
169+
},
170+
},
171+
{
172+
name: "uvicorn workers configured",
173+
instance: &llamav1alpha1.LlamaStackDistribution{
174+
Spec: llamav1alpha1.LlamaStackDistributionSpec{
175+
Server: llamav1alpha1.ServerSpec{
176+
Workers: int32Ptr(4),
177+
},
178+
},
179+
},
180+
image: "test-image:latest",
181+
expectedResult: corev1.Container{
182+
Name: llamav1alpha1.DefaultContainerName,
183+
Image: "test-image:latest",
184+
Resources: corev1.ResourceRequirements{
185+
Requests: corev1.ResourceList{
186+
corev1.ResourceCPU: llamav1alpha1.DefaultServerCPURequest,
187+
corev1.ResourceMemory: llamav1alpha1.DefaultServerMemoryRequest,
188+
},
189+
},
190+
Ports: []corev1.ContainerPort{{ContainerPort: llamav1alpha1.DefaultServerPort}},
191+
StartupProbe: newDefaultStartupProbe(llamav1alpha1.DefaultServerPort),
192+
Env: []corev1.EnvVar{
193+
{Name: "HF_HOME", Value: "/.llama"},
194+
{Name: "LLS_WORKERS", Value: "4"},
195+
{Name: "LLS_PORT", Value: "8321"},
196+
{Name: "LLAMA_STACK_CONFIG", Value: "/etc/llama-stack/run.yaml"},
155197
},
198+
VolumeMounts: []corev1.VolumeMount{{
199+
Name: "lls-storage",
200+
MountPath: llamav1alpha1.DefaultMountPath,
201+
}},
156202
},
157203
},
158204
{
@@ -187,6 +233,9 @@ func TestBuildContainerSpec(t *testing.T) {
187233
Args: []string{},
188234
Env: []corev1.EnvVar{
189235
{Name: "HF_HOME", Value: llamav1alpha1.DefaultMountPath},
236+
{Name: "LLS_WORKERS", Value: "1"},
237+
{Name: "LLS_PORT", Value: "8321"},
238+
{Name: "LLAMA_STACK_CONFIG", Value: "/etc/llama-stack/run.yaml"},
190239
},
191240
VolumeMounts: []corev1.VolumeMount{
192241
{

docs/api-overview.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,7 @@ _Appears in:_
218218
| --- | --- | --- | --- |
219219
| `distribution` _[DistributionType](#distributiontype)_ | | | |
220220
| `containerSpec` _[ContainerSpec](#containerspec)_ | | | |
221+
| `workers` _integer_ | Workers configures the number of uvicorn worker processes to run.<br />When set, the operator will launch llama-stack using uvicorn with the specified worker count. | | Minimum: 1 <br /> |
221222
| `podOverrides` _[PodOverrides](#podoverrides)_ | | | |
222223
| `podDisruptionBudget` _[PodDisruptionBudgetSpec](#poddisruptionbudgetspec)_ | PodDisruptionBudget controls voluntary disruption tolerance for the server pods | | |
223224
| `topologySpreadConstraints` _[TopologySpreadConstraint](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.31/#topologyspreadconstraint-v1-core) array_ | TopologySpreadConstraints defines fine-grained spreading rules | | |

release/operator.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2577,6 +2577,13 @@ spec:
25772577
required:
25782578
- configMapName
25792579
type: object
2580+
workers:
2581+
description: |-
2582+
Workers configures the number of uvicorn worker processes to run.
2583+
When set, the operator will launch llama-stack using uvicorn with the specified worker count.
2584+
format: int32
2585+
minimum: 1
2586+
type: integer
25802587
required:
25812588
- distribution
25822589
type: object

0 commit comments

Comments
 (0)