Skip to content

Commit eee2a8a

Browse files
Merge branch 'aws-samples:main' into main
2 parents 6ae451b + 95cf301 commit eee2a8a

File tree

1 file changed

+60
-13
lines changed

1 file changed

+60
-13
lines changed

workshops/diy-agents-with-sagemaker-and-bedrock/0-setup/2-setup-sagemaker-endpoint.ipynb

Lines changed: 60 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,36 @@
2626
"cell_type": "code",
2727
"execution_count": null,
2828
"id": "fd08268e",
29-
"metadata": {},
30-
"outputs": [],
29+
"metadata": {
30+
"execution": {
31+
"iopub.execute_input": "2025-10-15T17:49:49.105299Z",
32+
"iopub.status.busy": "2025-10-15T17:49:49.105048Z",
33+
"iopub.status.idle": "2025-10-15T18:01:24.859971Z",
34+
"shell.execute_reply": "2025-10-15T18:01:24.859446Z",
35+
"shell.execute_reply.started": "2025-10-15T17:49:49.105277Z"
36+
}
37+
},
38+
"outputs": [
39+
{
40+
"name": "stdout",
41+
"output_type": "stream",
42+
"text": [
43+
"sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml\n",
44+
"sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml\n",
45+
"----------------------!"
46+
]
47+
},
48+
{
49+
"data": {
50+
"text/plain": [
51+
"<sagemaker.djl_inference.djl_predictor.DJLPredictor at 0x7f5e46e66d50>"
52+
]
53+
},
54+
"execution_count": 1,
55+
"metadata": {},
56+
"output_type": "execute_result"
57+
}
58+
],
3159
"source": [
3260
"from sagemaker.djl_inference import DJLModel\n",
3361
"from sagemaker.utils import name_from_base\n",
@@ -44,18 +72,20 @@
4472
"endpoint_name = name_from_base(model_id.split(\"/\")[1]+\"-ep\")\n",
4573
"model = DJLModel(\n",
4674
" name=model_name, role=role,\n",
47-
" image_uri=f\"763104351884.dkr.ecr.{boto3.Session().region_name}.amazonaws.com/djl-inference:0.33.0-lmi15.0.0-cu128-v1.3\",\n",
75+
" image_uri=f\"763104351884.dkr.ecr.{boto3.Session().region_name}.amazonaws.com/djl-inference:0.34.0-lmi16.0.0-cu128-v1.2\",\n",
4876
" env={\n",
49-
" \"HF_MODEL_ID\": model_id, # config: https://qwen.readthedocs.io/en/latest/framework/function_call.html#vllm \n",
50-
" \"OPTION_MAX_MODEL_LEN\": f\"{1024*20}\",\n",
77+
" \"HF_MODEL_ID\": model_id,\n",
78+
" \"OPTION_MAX_MODEL_LEN\": f\"{1024*16}\",\n",
5179
" \"OPTION_QUANTIZE\": \"fp8\",\n",
52-
" # vllm serve {model_id} --enable-auto-tool-choice --tool-call-parser hermes\n",
53-
" \"OPTION_ROLLING_BATCH\": \"vllm\",\n",
80+
" 'OPTION_DTYPE': 'bf16',\n",
81+
" 'SERVING_FAIL_FAST': 'true',\n",
82+
" 'OPTION_ROLLING_BATCH': 'disable',\n",
83+
" 'OPTION_ASYNC_MODE': 'true',\n",
84+
" 'OPTION_ENTRYPOINT': 'djl_python.lmi_vllm.vllm_async_service',\n",
5485
" \"OPTION_ENABLE_AUTO_TOOL_CHOICE\": \"true\",\n",
5586
" \"OPTION_TOOL_CALL_PARSER\": \"hermes\",\n",
56-
" # --enable-reasoning --reasoning-parser deepseek_r1\n",
57-
" # \"OPTION_ENABLE_REASONING\": \"true\",\n",
58-
" # \"OPTION_REASONING_PARSER\": \"qwen3\", # currently not available in djl lmi15\n",
87+
" \"OPTION_ENABLE_REASONING\": \"true\",\n",
88+
" \"OPTION_REASONING_PARSER\": \"qwen3\",\n",
5989
" }\n",
6090
")\n",
6191
"model.deploy(\n",
@@ -67,10 +97,27 @@
6797
},
6898
{
6999
"cell_type": "code",
70-
"execution_count": null,
100+
"execution_count": 2,
71101
"id": "18e6de49-11f7-4e36-b7bb-322282a51e53",
72-
"metadata": {},
73-
"outputs": [],
102+
"metadata": {
103+
"execution": {
104+
"iopub.execute_input": "2025-10-15T18:01:24.860899Z",
105+
"iopub.status.busy": "2025-10-15T18:01:24.860666Z",
106+
"iopub.status.idle": "2025-10-15T18:01:24.864407Z",
107+
"shell.execute_reply": "2025-10-15T18:01:24.863955Z",
108+
"shell.execute_reply.started": "2025-10-15T18:01:24.860877Z"
109+
}
110+
},
111+
"outputs": [
112+
{
113+
"name": "stdout",
114+
"output_type": "stream",
115+
"text": [
116+
"Endpoint name: Qwen3-4B-ep-2025-10-15-17-49-51-128\n",
117+
"Stored 'SAGEMAKER_ENDPOINT_NAME' (str)\n"
118+
]
119+
}
120+
],
74121
"source": [
75122
"SAGEMAKER_ENDPOINT_NAME = model.endpoint_name\n",
76123
"print(f\"Endpoint name: {SAGEMAKER_ENDPOINT_NAME}\")\n",

0 commit comments

Comments
 (0)