1+ # ### 6. Deploy Step
2+ # This step deploys the model for evaluation
3+
4+ import sagemaker
5+ import boto3
6+ from sagemaker import get_execution_role
7+ from sagemaker import Model
8+ from sagemaker .model_monitor import DataCaptureConfig
9+ import time
10+ from sagemaker .workflow .function_step import step
11+ from .pipeline_utils import PIPELINE_INSTANCE_TYPE
12+
13+
14+ @step (
15+ name = "ModelDeploy" ,
16+ instance_type = PIPELINE_INSTANCE_TYPE ,
17+ display_name = "Model Deploy" ,
18+ keep_alive_period_in_seconds = 900
19+ )
20+ def deploy (
21+ model_artifacts_s3_path : str ,
22+ output_path : str ,
23+ model_id : str ,
24+ ):
25+ sagemaker_session = sagemaker .Session ()
26+ instance_count = 1
27+ instance_type = "ml.g5.2xlarge"
28+ health_check_timeout = 700
29+
30+ # Get the name for the endpoint
31+ endpoint_name = f"{ model_id .split ('/' )[- 1 ].replace ('.' , '-' ).replace ('_' ,'-' )} -sft-djl"
32+
33+ # Delete existing endpoint if it exists
34+ print (f"Checking for existing endpoint: { endpoint_name } " )
35+ sm_client = boto3 .client ('sagemaker' )
36+ try :
37+ sm_client .describe_endpoint (EndpointName = endpoint_name )
38+ print (f"Endpoint { endpoint_name } exists, deleting it before deployment" )
39+ sm_client .delete_endpoint (EndpointName = endpoint_name )
40+
41+ print (f"Deleting endpoint config { endpoint_name } " )
42+ sm_client .delete_endpoint_config (EndpointConfigName = endpoint_name )
43+
44+ # Wait for endpoint to be fully deleted
45+ print ("Waiting for endpoint to be fully deleted..." )
46+ wait_seconds = 10
47+ total_wait_time = 0
48+ max_wait_time = 300 # 5 minutes maximum wait
49+ endpoint_deleted = False
50+
51+ while total_wait_time < max_wait_time and not endpoint_deleted :
52+ try :
53+ sm_client .describe_endpoint (EndpointName = endpoint_name )
54+ print (f"Endpoint still exists, waiting { wait_seconds } seconds..." )
55+ time .sleep (wait_seconds )
56+ total_wait_time += wait_seconds
57+ except sm_client .exceptions .ClientError :
58+ print (f"Endpoint { endpoint_name } successfully deleted" )
59+ endpoint_deleted = True
60+
61+ if not endpoint_deleted :
62+ print (f"Warning: Endpoint still exists after { max_wait_time } seconds" )
63+
64+ except sm_client .exceptions .ClientError :
65+ print (f"Endpoint { endpoint_name } does not exist, proceeding with deployment" )
66+
67+ # Continue with model deployment
68+ image_uri = sagemaker .image_uris .retrieve (
69+ framework = "djl-lmi" ,
70+ region = sagemaker_session .boto_session .region_name ,
71+ version = "latest"
72+ )
73+
74+ model_data = model_artifacts_s3_path
75+
76+ # Create model only once
77+ model = Model (
78+ image_uri = image_uri ,
79+ model_data = model_data ,
80+ role = get_execution_role (),
81+ env = {
82+ 'HF_MODEL_ID' : "/opt/ml/model" , # path to where sagemaker stores the model
83+ 'OPTION_TRUST_REMOTE_CODE' : 'true' ,
84+ 'OPTION_ROLLING_BATCH' : "vllm" ,
85+ 'OPTION_DTYPE' : 'bf16' ,
86+ 'OPTION_QUANTIZE' : 'fp8' ,
87+ 'OPTION_TENSOR_PARALLEL_DEGREE' : 'max' ,
88+ 'OPTION_MAX_ROLLING_BATCH_SIZE' : '32' ,
89+ 'OPTION_MODEL_LOADING_TIMEOUT' : '3600' ,
90+ 'OPTION_MAX_MODEL_LEN' : '4096'
91+ }
92+ )
93+
94+ print (f"deploying endpoint: { endpoint_name } " )
95+
96+ data_capture_config = DataCaptureConfig (
97+ enable_capture = True ,
98+ sampling_percentage = 100 ,
99+ destination_s3_uri = 's3://sagemaker-us-east-1-329542461890/data-capture/' ,
100+ capture_options = ["REQUEST" , "RESPONSE" ],
101+ csv_content_types = ["text/csv" ],
102+ json_content_types = ["application/json" ]
103+ )
104+
105+ predictor = model .deploy (
106+ endpoint_name = endpoint_name ,
107+ initial_instance_count = instance_count ,
108+ instance_type = instance_type ,
109+ container_startup_health_check_timeout = health_check_timeout ,
110+ model_data_download_timeout = 3600 ,
111+ data_capture_config = data_capture_config
112+ )
113+
114+ return endpoint_name
0 commit comments