Skip to content

Commit 064720c

Browse files
committed
Updated for AIM405
1 parent 8d2b125 commit 064720c

File tree

14 files changed

+3210
-1308
lines changed

14 files changed

+3210
-1308
lines changed

workshops/fine-tuning-with-sagemakerai-and-bedrock/task_05_fmops/05.01_fine-tuning-pipeline.ipynb

Lines changed: 1167 additions & 1066 deletions
Large diffs are not rendered by default.

workshops/fine-tuning-with-sagemakerai-and-bedrock/task_05_fmops/scripts/requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1+
awscli==1.42.25
12
transformers==4.50.2
23
peft==0.14.0
34
accelerate==1.3.0
45
bitsandbytes==0.45.1
5-
datasets==3.2.0
6+
datasets==3.5.0
67
evaluate==0.4.3
78
huggingface_hub[hf_transfer]==0.33.4
89
mlflow

workshops/fine-tuning-with-sagemakerai-and-bedrock/task_05_fmops/steps/__init__.py

Whitespace-only changes.
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
# ### 6. Deploy Step
2+
# This step deploys the model for evaluation
3+
4+
import sagemaker
5+
import boto3
6+
from sagemaker import get_execution_role
7+
from sagemaker import Model
8+
from sagemaker.model_monitor import DataCaptureConfig
9+
import time
10+
from sagemaker.workflow.function_step import step
11+
from .pipeline_utils import PIPELINE_INSTANCE_TYPE
12+
13+
14+
@step(
15+
name="ModelDeploy",
16+
instance_type=PIPELINE_INSTANCE_TYPE,
17+
display_name="Model Deploy",
18+
keep_alive_period_in_seconds=900
19+
)
20+
def deploy(
21+
model_artifacts_s3_path: str,
22+
output_path: str,
23+
model_id: str,
24+
):
25+
sagemaker_session = sagemaker.Session()
26+
instance_count = 1
27+
instance_type = "ml.g5.2xlarge"
28+
health_check_timeout = 700
29+
30+
# Get the name for the endpoint
31+
endpoint_name = f"{model_id.split('/')[-1].replace('.', '-').replace('_','-')}-sft-djl"
32+
33+
# Delete existing endpoint if it exists
34+
print(f"Checking for existing endpoint: {endpoint_name}")
35+
sm_client = boto3.client('sagemaker')
36+
try:
37+
sm_client.describe_endpoint(EndpointName=endpoint_name)
38+
print(f"Endpoint {endpoint_name} exists, deleting it before deployment")
39+
sm_client.delete_endpoint(EndpointName=endpoint_name)
40+
41+
print(f"Deleting endpoint config {endpoint_name}")
42+
sm_client.delete_endpoint_config(EndpointConfigName=endpoint_name)
43+
44+
# Wait for endpoint to be fully deleted
45+
print("Waiting for endpoint to be fully deleted...")
46+
wait_seconds = 10
47+
total_wait_time = 0
48+
max_wait_time = 300 # 5 minutes maximum wait
49+
endpoint_deleted = False
50+
51+
while total_wait_time < max_wait_time and not endpoint_deleted:
52+
try:
53+
sm_client.describe_endpoint(EndpointName=endpoint_name)
54+
print(f"Endpoint still exists, waiting {wait_seconds} seconds...")
55+
time.sleep(wait_seconds)
56+
total_wait_time += wait_seconds
57+
except sm_client.exceptions.ClientError:
58+
print(f"Endpoint {endpoint_name} successfully deleted")
59+
endpoint_deleted = True
60+
61+
if not endpoint_deleted:
62+
print(f"Warning: Endpoint still exists after {max_wait_time} seconds")
63+
64+
except sm_client.exceptions.ClientError:
65+
print(f"Endpoint {endpoint_name} does not exist, proceeding with deployment")
66+
67+
# Continue with model deployment
68+
image_uri = sagemaker.image_uris.retrieve(
69+
framework="djl-lmi",
70+
region=sagemaker_session.boto_session.region_name,
71+
version="latest"
72+
)
73+
74+
model_data = model_artifacts_s3_path
75+
76+
# Create model only once
77+
model = Model(
78+
image_uri=image_uri,
79+
model_data=model_data,
80+
role=get_execution_role(),
81+
env={
82+
'HF_MODEL_ID': "/opt/ml/model", # path to where sagemaker stores the model
83+
'OPTION_TRUST_REMOTE_CODE': 'true',
84+
'OPTION_ROLLING_BATCH': "vllm",
85+
'OPTION_DTYPE': 'bf16',
86+
'OPTION_QUANTIZE': 'fp8',
87+
'OPTION_TENSOR_PARALLEL_DEGREE': 'max',
88+
'OPTION_MAX_ROLLING_BATCH_SIZE': '32',
89+
'OPTION_MODEL_LOADING_TIMEOUT': '3600',
90+
'OPTION_MAX_MODEL_LEN': '4096'
91+
}
92+
)
93+
94+
print(f"deploying endpoint: {endpoint_name}")
95+
96+
data_capture_config = DataCaptureConfig(
97+
enable_capture=True,
98+
sampling_percentage=100,
99+
destination_s3_uri='s3://sagemaker-us-east-1-329542461890/data-capture/',
100+
capture_options=["REQUEST", "RESPONSE"],
101+
csv_content_types=["text/csv"],
102+
json_content_types=["application/json"]
103+
)
104+
105+
predictor = model.deploy(
106+
endpoint_name=endpoint_name,
107+
initial_instance_count=instance_count,
108+
instance_type=instance_type,
109+
container_startup_health_check_timeout=health_check_timeout,
110+
model_data_download_timeout=3600,
111+
data_capture_config=data_capture_config
112+
)
113+
114+
return endpoint_name

workshops/fine-tuning-with-sagemakerai-and-bedrock/task_05_fmops/steps/evaluation_mlflow.py

Lines changed: 0 additions & 61 deletions
This file was deleted.

workshops/fine-tuning-with-sagemakerai-and-bedrock/task_05_fmops/steps/finetune_llama3b_hf.py

Lines changed: 0 additions & 96 deletions
This file was deleted.

0 commit comments

Comments
 (0)