Finished migrating to Qwen and Messages API

frgud · frgud · commit 47e04ffd75fc · 2025-10-14T10:18:32.000-04:00
diff --git a/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_05_fmops/05.01_fine-tuning-pipeline.ipynb b/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_05_fmops/05.01_fine-tuning-pipeline.ipynb
@@ -485,7 +485,7 @@
    "outputs": [],
    "source": [
     "from steps import pipeline_utils\n",
-    "guardrail_id, guardrail_version = pipeline_utils.get_or_create_guardrail()"
+    "guardrail_id, guardrail_version =pipeline_utils.get_or_create_guardrail()"
    ]
   },
   {
@@ -523,12 +523,10 @@
     ")\n",
     "run_id=training_step[0]\n",
     "model_artifacts_s3_path=training_step[2]\n",
-    "# output_path=training_step[3]\n",
     "\n",
     "deploy_step = deploy_step.deploy(\n",
     "    tracking_server_arn=mlflow_tracking_server_arn,\n",
     "    model_artifacts_s3_path=model_artifacts_s3_path,\n",
-    "    # output_path=output_path,\n",
     "    model_id=model_s3_destination,\n",
     "    experiment_name=pipeline_name,\n",
     "    run_id=run_id,\n",
@@ -742,6 +740,13 @@
     "    print(f\"Error during endpoint cleanup: {str(e)}\")\n",
     "    print(\"You may need to manually delete the endpoint from the SageMaker console\")"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
diff --git a/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_05_fmops/steps/pipeline_utils.py b/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_05_fmops/steps/pipeline_utils.py
@@ -8,22 +8,6 @@
 PIPELINE_INSTANCE_TYPE = "ml.m5.xlarge"
 
 
-# PROMPT_TEMPLATE = f"""
-# <|begin_of_text|>
-# <|start_header_id|>system<|end_header_id|>
-# You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning. 
-# Below is an instruction that describes a task, paired with an input that provides further context. 
-# Write a response that appropriately completes the request.
-# Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.
-# <|eot_id|><|start_header_id|>user<|end_header_id|>
-# {{question}}<|eot_id|>
-# <|start_header_id|>assistant<|end_header_id|>
-# {{complex_cot}}
-
-# {{answer}}
-# <|eot_id|>
-# """
-
 SYSTEM_PROMPT = """You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning. 
 Below is an instruction that describes a task, paired with an input that provides further context. 
 Write a response that appropriately completes the request.
@@ -62,6 +46,7 @@ def convert_to_messages(sample, system_prompt=""):
     
     return sample
 
+
 def invoke_sagemaker_endpoint(payload, endpoint_name):
     """
     Invoke a SageMaker endpoint with the given payload.
@@ -90,69 +75,65 @@ def invoke_sagemaker_endpoint(payload, endpoint_name):
         return None, -1
 
 
-def create_guardrail(guardrail_client):
-    try:
-        guardrail = guardrail_client.create_guardrail(
-            name="ExampleMedicalGuardrail",
-            description='Example of a Guardrail for Medical Use Cases',
-            topicPolicyConfig={
-                'topicsConfig': [{
-                    'name': 'Block Pharmaceuticals',
-                    'definition': 'This model cannot recommend one pharmaceutical over another. Generic prescriptions consistent with medical expertise and clinical diagnoses only.',
-                    'type': 'DENY',
-                    'inputAction': 'BLOCK',
-                    'outputAction': 'BLOCK',
-                }]
-            },
-            sensitiveInformationPolicyConfig={
-                'piiEntitiesConfig': [
-                    {
-                        'type': 'UK_NATIONAL_HEALTH_SERVICE_NUMBER',
-                        'action': 'BLOCK',
-                        'inputAction': 'BLOCK',
-                        'outputAction': 'BLOCK'
-                    },
-                ]
-            },
-            contextualGroundingPolicyConfig={
-                'filtersConfig': [
-                    {
-                        'type': 'RELEVANCE',
-                        'threshold': 0.9,
-                        'action': 'BLOCK',
-                        'enabled': True
-                    },
-                ]
-            },
-            blockedInputMessaging="ExampleMedicalGuardrail has blocked this input.",
-            blockedOutputsMessaging="ExampleMedicalGuardrail has blocked this output."
-        )
-        guardrail_id = guardrail['guardrailId']
-        guardrail_version = guardrail['version']
-
-        print(f"Created new guardrail '{guardrail_id}:{guardrail_version}'")
-        return guardrail_id, guardrail_version
-    except botocore.exceptions.ClientError as create_error:
-        print(f"Error creating guardrail: {create_error}")
-
-
 def get_or_create_guardrail():
     guardrail_client = boto3.client('bedrock')
     guardrail_name = "ExampleMedicalGuardrail"
     try:
         # Try to get the guardrail
         response = guardrail_client.list_guardrails()
-        guardrail_id = ""
         for guardrail in response.get('guardrails', []):
             if guardrail['name'] == guardrail_name:
                 guardrail_id = guardrail['id']
-        if guardrail_id != "":
-            response = guardrail_client.get_guardrail(
-                guardrailIdentifier=guardrail_id
-            )
-            guardrail_version = response["version"]
-            print(f"Found Guardrail {guardrail_id}:{guardrail_version}")
-        else:
-            return create_guardrail(guardrail_client)
+        response = guardrail_client.get_guardrail(
+            guardrailIdentifier=guardrail_id
+        )
+        guardrail_version = response["version"]
+        print(f"Found Guardrail {guardrail_id}:{guardrail_version}")
     except botocore.exceptions.ClientError as e:
-            print(f"Error checking guardrail: {e}")
+        if e.response['Error']['Code'] == 'ResourceNotFoundException':
+            # Guardrail doesn't exist, create it
+            try:
+                guardrail = guardrail_client.create_guardrail(
+                    name="ExampleMedicalGuardrail",
+                    description='Example of a Guardrail for Medical Use Cases',
+                    topicPolicyConfig={
+                        'topicsConfig': [{
+                            'name': 'Block Pharmaceuticals',
+                            'definition': 'This model cannot recommend one pharmaceutical over another. Generic prescriptions consistent with medical expertise and clinical diagnoses only.',
+                            'type': 'DENY',
+                            'inputAction': 'BLOCK',
+                            'outputAction': 'BLOCK',
+                        }]        
+                    },
+                    sensitiveInformationPolicyConfig={
+                        'piiEntitiesConfig': [
+                            {
+                                'type': 'UK_NATIONAL_HEALTH_SERVICE_NUMBER',
+                                'action': 'BLOCK',
+                                'inputAction': 'BLOCK',
+                                'outputAction': 'BLOCK'
+                            },
+                        ]
+                    },
+                    contextualGroundingPolicyConfig={
+                        'filtersConfig': [
+                            {
+                                'type': 'RELEVANCE',
+                                'threshold': 0.9,
+                                'action': 'BLOCK',
+                                'enabled': True
+                            },
+                        ]
+                    },
+                    blockedInputMessaging="ExampleMedicalGuardrail has blocked this input.",
+                    blockedOutputsMessaging="ExampleMedicalGuardrail has blocked this output."
+                )
+                guardrail_id = guardrail['guardrailId']
+                guardrail_version = guardrail['version']
+                
+                print(f"Created new guardrail '{guardrail_id}:{guardrail_version}'")
+            except botocore.exceptions.ClientError as create_error:
+                print(f"Error creating guardrail: {create_error}")
+        else:
+            print(f"Error checking guardrail: {e}")
+    return guardrail_id, guardrail_version
diff --git a/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_05_fmops/steps/qualitative_eval_step.py b/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_05_fmops/steps/qualitative_eval_step.py
@@ -4,6 +4,8 @@
 
 from sagemaker.workflow.function_step import step
 from .pipeline_utils import PIPELINE_INSTANCE_TYPE
+from .pipeline_utils import SYSTEM_PROMPT
+from .pipeline_utils import convert_to_messages
 
 
 @step(
@@ -54,7 +56,7 @@ def invoke_sagemaker_endpoint(payload, endpoint_name):
             guardrailIdentifier=guardrail_id,
             guardrailVersion=guardrail_version,
             source='INPUT',
-            content=[{'text': {'text': payload["inputs"]}}]
+            content=[{'text': {'text': payload["messages"][0]["content"]}}]
         )
         guardrailResult = guardrail_response_input["action"]
     
@@ -64,15 +66,29 @@ def invoke_sagemaker_endpoint(payload, endpoint_name):
         
         try:
             start_time = time.time()
-            response = sm_client.invoke_endpoint(
-                EndpointName=endpoint_name,
-                ContentType='application/json',
-                Body=json.dumps(payload)
+            # response = sm_client.invoke_endpoint(
+            #     EndpointName=endpoint_name,
+            #     ContentType='application/json',
+            #     Body=json.dumps(payload)
+            # )
+            # inference_time = time.time() - start_time
+            
+            # response_body = response['Body'].read().decode('utf-8')
+            # return json.loads(response_body), inference_time
+
+            from sagemaker.predictor import Predictor
+            from sagemaker.serializers import JSONSerializer
+            from sagemaker.deserializers import JSONDeserializer
+            
+            predictor = Predictor(
+                endpoint_name=f"{endpoint_name}",
+                serializer=JSONSerializer(),
+                deserializer=JSONDeserializer()
             )
-            inference_time = time.time() - start_time
             
-            response_body = response['Body'].read().decode('utf-8')
-            return json.loads(response_body), inference_time
+            response = predictor.predict(payload)['choices'][0]['message']['content']
+            inference_time = time.time() - start_time
+            return response, inference_time
         except Exception as e:
             print(f"Error invoking endpoint {endpoint_name}: {str(e)}")
             return None, -1
@@ -295,44 +311,31 @@ def evaluate_model_qualitatively(model_config, dataset):
             question = example["Question"]
             reference = "\n".join([example["Complex_CoT"], example["Response"]])
             
-            # Prepare the prompt for the model
-            prompt = f"""
-            <|begin_of_text|>
-            <|start_header_id|>system<|end_header_id|>
-            You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning. 
-            Below is an instruction that describes a task, paired with an input that provides further context. 
-            Write a response that appropriately completes the request.
-            Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.
-            <|eot_id|><|start_header_id|>user<|end_header_id|>
-            {question}<|eot_id|>
-            <|start_header_id|>assistant<|end_header_id|>"""
-            
-            # Payload for SageMaker endpoint
-            payload = {
-                "inputs": prompt,
-                "parameters": {
-                    "max_new_tokens": 512,
-                    "top_p": 0.9,
-                    "temperature": 0.6,
-                    "return_full_text": False
-                }
+            payload = {}
+            messages_prompt = convert_to_messages(example, SYSTEM_PROMPT)
+            payload["messages"] = messages_prompt["messages"]
+            payload["parameters"] = {
+                "max_new_tokens": 512,
+                "top_p": 0.9,
+                "temperature": 0.6,
+                "return_full_text": False
             }
-            
+                        
             # Call the model endpoint
             try:
                 response, inference_time = invoke_sagemaker_endpoint(payload, endpoint_name)
                 
                 if response is None:
                     prediction = "Error generating response."
                     failed_generations += 1
-                elif isinstance(response, list):
-                    prediction = response[0].get('generated_text', '').strip()
-                elif isinstance(response, dict):
-                    prediction = response.get('generated_text', '').strip()
+                # elif isinstance(response, list):
+                #     prediction = response[0].get('generated_text', '').strip()
+                # elif isinstance(response, dict):
+                #     prediction = response.get('generated_text', '').strip()
                 else:
                     prediction = str(response).strip()
                 
-                prediction = prediction.split("<|eot_id|>")[0] if "<|eot_id|>" in prediction else prediction
+                # prediction = prediction.split("<|eot_id|>")[0] if "<|eot_id|>" in prediction else prediction
                 inference_times.append(inference_time)
                 
             except Exception as e:
@@ -478,7 +481,7 @@ def evaluate_model_qualitatively(model_config, dataset):
             mlflow.log_param("qualitative_evaluation_endpoint", endpoint_name)
             mlflow.log_param("qualitative_evaluation_num_samples", num_samples)
             mlflow.log_param("qualitative_evaluation_timestamp", datetime.now().isoformat())
-            mlflow.log_param("llm_judge_model", "bedrock:/us.anthropic.claude-3-haiku-20240307-v1:0")
+            mlflow.log_param("llm_judge_model", "bedrock:/anthropic.claude-3-haiku-20240307-v1:0")
             
             # Load the test dataset
             try:
diff --git a/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_05_fmops/steps/quantitative_eval_step.py b/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_05_fmops/steps/quantitative_eval_step.py