Skip to content

Commit 2762ba0

Browse files
authored
Updated to latest DJL LMI image
1 parent 3cb2384 commit 2762ba0

File tree

1 file changed

+10
-7
lines changed

1 file changed

+10
-7
lines changed

workshops/diy-agents-with-sagemaker-and-bedrock/0-setup/2-setup-sagemaker-endpoint.ipynb

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -44,18 +44,21 @@
4444
"endpoint_name = name_from_base(model_id.split(\"/\")[1]+\"-ep\")\n",
4545
"model = DJLModel(\n",
4646
" name=model_name, role=role,\n",
47-
" image_uri=f\"763104351884.dkr.ecr.{boto3.Session().region_name}.amazonaws.com/djl-inference:0.33.0-lmi15.0.0-cu128-v1.3\",\n",
47+
" image_uri=f\"763104351884.dkr.ecr.{boto3.Session().region_name}.amazonaws.com/djl-inference:0.34.0-lmi16.0.0-cu128-v1.2\",\n",
4848
" env={\n",
49-
" \"HF_MODEL_ID\": model_id, # config: https://qwen.readthedocs.io/en/latest/framework/function_call.html#vllm \n",
50-
" \"OPTION_MAX_MODEL_LEN\": f\"{1024*20}\",\n",
49+
" \"HF_MODEL_ID\": model_id,
50+
" \"OPTION_MAX_MODEL_LEN\": f\"{1024*16}\",\n",
5151
" \"OPTION_QUANTIZE\": \"fp8\",\n",
52-
" # vllm serve {model_id} --enable-auto-tool-choice --tool-call-parser hermes\n",
5352
" \"OPTION_ROLLING_BATCH\": \"vllm\",\n",
53+
" \"OPTION_DTYPE\": \"bf16\",\n",
54+
" \"SERVING_FAIL_FAST\": \"true",\n",
55+
" \"OPTION_ROLLING_BATCH\": \"disable",\n",
56+
" \"OPTION_ASYNC_MODE\": \"true",\n",
57+
" \"OPTION_ENTRYPOINT\": \"djl_python.lmi_vllm.vllm_async_service",\n",
5458
" \"OPTION_ENABLE_AUTO_TOOL_CHOICE\": \"true\",\n",
5559
" \"OPTION_TOOL_CALL_PARSER\": \"hermes\",\n",
56-
" # --enable-reasoning --reasoning-parser deepseek_r1\n",
57-
" # \"OPTION_ENABLE_REASONING\": \"true\",\n",
58-
" # \"OPTION_REASONING_PARSER\": \"qwen3\", # currently not available in djl lmi15\n",
60+
" \"OPTION_ENABLE_REASONING\": \"true\",\n",
61+
" \"OPTION_REASONING_PARSER\": \"qwen3\",\n",
5962
" }\n",
6063
")\n",
6164
"model.deploy(\n",

0 commit comments

Comments
 (0)