aws-samples
diff --git a/‎workshops/fine-tuning-with-sagemakerai-and-bedrock/task_05_fmops/05.00_fmops_examples.ipynb‎
Lines changed: 48 additions & 76 deletions b/‎workshops/fine-tuning-with-sagemakerai-and-bedrock/task_05_fmops/05.00_fmops_examples.ipynb‎
Lines changed: 48 additions & 76 deletions
diff --git a/‎workshops/fine-tuning-with-sagemakerai-and-bedrock/task_05_fmops/05.01_fine-tuning-pipeline.ipynb‎
Lines changed: 18 additions & 17 deletions b/‎workshops/fine-tuning-with-sagemakerai-and-bedrock/task_05_fmops/05.01_fine-tuning-pipeline.ipynb‎
Lines changed: 18 additions & 17 deletions
diff --git a/‎workshops/fine-tuning-with-sagemakerai-and-bedrock/task_05_fmops/scripts/requirements.txt‎
Lines changed: 10 additions & 11 deletions b/‎workshops/fine-tuning-with-sagemakerai-and-bedrock/task_05_fmops/scripts/requirements.txt‎
Lines changed: 10 additions & 11 deletions
@@ -102,7 +102,8 @@
    "outputs": [],
    "source": [
     "sagemaker_session = sagemaker.session.Session()\n",
-    "role = sagemaker.get_execution_role()"
+    "role = sagemaker.get_execution_role()\n",
+    "region = sagemaker_session.boto_session.region_name"
    ]
   },
   {
@@ -114,7 +115,7 @@
     "\n",
     "We define appropriate paths in S3 to store model files, define the model we will be working with, and define the model endpoint name.\n",
     "\n",
-    "In this lab, we are working with [DeepSeek-R1-Distill-Llama-8B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B). It is easy to fine-tune as we will see in the next lab, and is small enough to fit on a reasonably sized GPU-accelerated hosting endpoint."
+    "In this lab, we are working with [Qwen3-4B-Instruct-2507](https://huggingface.co/Qwen/Qwen3-4B-Instruct-2507). It is easy to fine-tune as we will see in the next lab, and is small enough to fit on a reasonably sized GPU-accelerated hosting endpoint."
    ]
   },
   {
@@ -138,12 +139,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "model_id = \"deepseek-ai/DeepSeek-R1-Distill-Llama-8B\"\n",
+    "model_id = \"Qwen/Qwen3-4B-Instruct-2507\"\n",
     "model_id_filesafe = model_id.replace(\"/\",\"_\").replace(\".\", \"_\")\n",
     "model_name_safe = model_id.split('/')[-1].replace('.', '-').replace('_', '-')\n",
     "endpoint_name = f\"Example-{model_name_safe}\"\n",
     "instance_count = 1\n",
-    "instance_type = \"ml.g5.2xlarge\""
+    "instance_type = \"ml.g5.2xlarge\"\n",
+    "health_check_timeout = 1800\n",
+    "data_download_timeout = 3600"
    ]
   },
   {
@@ -170,15 +173,14 @@
    "source": [
     "mlflow_tracking_server_arn = \"<REPLACE WITH YOUR ARN>\"\n",
     "\n",
-    "if not mlflow_tracking_server_arn:\n",
-    "    try:\n",
-    "        response = boto3.client('sagemaker').describe_mlflow_tracking_server(\n",
-    "            TrackingServerName='genai-mlflow-tracker'\n",
-    "        )\n",
-    "        mlflow_tracking_server_arn = response['TrackingServerArn']\n",
-    "        print(f\"MLflow Tracking Server ARN: {mlflow_tracking_server_arn}\")\n",
-    "    except botocore.exceptions.ClientError:\n",
-    "        print(\"No MLflow Tracking Server Found, please input a value for mlflow_tracking_server_arn\")\n",
+    "try:\n",
+    "    response = boto3.client('sagemaker').describe_mlflow_tracking_server(\n",
+    "        TrackingServerName='genai-mlflow-tracker'\n",
+    "    )\n",
+    "    mlflow_tracking_server_arn = response['TrackingServerArn']\n",
+    "    print(f\"MLflow Tracking Server ARN: {mlflow_tracking_server_arn}\")\n",
+    "except botocore.exceptions.ClientError:\n",
+    "    print(\"No MLflow Tracking Server Found, please input a value for mlflow_tracking_server_arn\")\n",
     "\n",
     "os.environ[\"mlflow_tracking_server_arn\"] = mlflow_tracking_server_arn"
    ]
@@ -188,7 +190,7 @@
    "metadata": {},
    "source": [
     "### 4. Model Deployment\n",
-    "There are several approaches to deploying a model to a SageMaker AI managed endpoint. In this section, we explore the most direct option which downloads a model directly from HuggingFace to the managed endpoint via SageMaker JumpStart. We are still using DeepSeek-R1-Distill-Llama-8B, but we have not fine-tuned it. The purpose of this section is to illustrate the components required to customize a model deployment on SageMaker before fine-tuning it."
+    "There are several approaches to deploying a model to a SageMaker AI managed endpoint. In this section, we explore the most direct option which downloads a model directly from HuggingFace to the managed endpoint via SageMaker JumpStart. We are still using Qwen3-4B-Instruct-2507, but we have not fine-tuned it. The purpose of this section is to illustrate the components required to customize a model deployment on SageMaker before fine-tuning it."
    ]
   },
   {
@@ -207,12 +209,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Create and deploy model\n",
-    "image_uri = sagemaker.image_uris.retrieve(\n",
-    "    framework=\"djl-lmi\",\n",
-    "    region=sagemaker_session.boto_session.region_name,\n",
-    "    version=\"latest\"\n",
-    ")"
+    "inference_image_uri = f\"763104351884.dkr.ecr.{region}.amazonaws.com/djl-inference:0.33.0-lmi15.0.0-cu128\"\n",
+    "print(f\"using image to host: {inference_image_uri}\")"
    ]
   },
   {
@@ -242,7 +240,7 @@
     "    'OPTION_MAX_MODEL_LEN': '4096'\n",
     "}\n",
     "model = HuggingFaceModel(\n",
-    "    image_uri=image_uri,\n",
+    "    image_uri=inference_image_uri,\n",
     "    env=model_config,\n",
     "    role=role\n",
     ")"
@@ -276,8 +274,6 @@
     "with mlflow.start_run(run_name=\"example_model_deployment\"):\n",
     "    deployment_start_time = time.time()\n",
     "\n",
-    "    health_check_timeout = 1800\n",
-    "    data_download_timeout = 3600\n",
     "\n",
     "    # Log deployment parameters\n",
     "    mlflow.log_params({\n",
@@ -297,7 +293,7 @@
     "            instance_type=instance_type,\n",
     "            container_startup_health_check_timeout=health_check_timeout,\n",
     "            model_data_download_timeout=data_download_timeout,\n",
-    "            endpoint_name=endpoint_name\n",
+    "            endpoint_name=f\"{endpoint_name}\"\n",
     "        )\n",
     "\n",
     "        # Log deployment metrics\n",
@@ -339,7 +335,7 @@
     "from sagemaker.deserializers import JSONDeserializer\n",
     "\n",
     "predictor = Predictor(\n",
-    "    endpoint_name=endpoint_name,\n",
+    "    endpoint_name=f\"{endpoint_name}\",\n",
     "    serializer=JSONSerializer(),\n",
     "    deserializer=JSONDeserializer()\n",
     ")\n",
@@ -436,7 +432,7 @@
    "metadata": {},
    "source": [
     "### 5. Qualitative Model Evaluation\n",
-    "Let's test the default DeepSeek-R1-Distill-Llama-8B using MLFlow's LLM-as-a-Judge capability. We'll use [Anthropic's Claude 3 Haiku](https://www.anthropic.com/news/claude-3-haiku) model on [Amazon Bedrock](https://aws.amazon.com/bedrock/) as the judge. We'll also wrap our model endpoint invocation in a method making it easier to call in the evaluation. \n",
+    "Let's test the default Qwen3-4B-Instruct-2507 using MLFlow's LLM-as-a-Judge capability. We'll use [Anthropic's Claude 3 Haiku](https://www.anthropic.com/news/claude-3-haiku) model on [Amazon Bedrock](https://aws.amazon.com/bedrock/) as the judge. We'll also wrap our model endpoint invocation in a method making it easier to call in the evaluation. \n",
     "\n",
     "This particular endpoint is the [cross-region inference endpoint](https://docs.aws.amazon.com/bedrock/latest/userguide/cross-region-inference.html) name for Claude 3 Haiku.\n",
     "\n",
@@ -498,7 +494,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Now use Managed MLFlow 3.0 on Amazon SageMaker AI's `EvaluationExample` object to provide examples of good and bad model responses. This synthetic data will be used to evaluate our Example DeepSeek-R1_Distill_Llama-8B along several qualitative metrics. We create these qualitative metrics using `make_genai_metric`."
+    "Now use Managed MLFlow 3.0 on Amazon SageMaker AI's `EvaluationExample` object to provide examples of good and bad model responses. This synthetic data will be used to evaluate our Example Qwen3-4B-Instruct-2507 along with several qualitative metrics. We create these qualitative metrics using `make_genai_metric`."
    ]
   },
   {
@@ -914,7 +910,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "In the next workshop we fine-tune DeepSeek-R1-Distill-Llama-8B to become a medical expert. To accomplish this, we execute a fine-tuning job using Managed MLflow on SageMaker AI. We get our data from the [FreedomIntelligence/medical-o1-reasoning-SFT](https://huggingface.co/datasets/FreedomIntelligence/medical-o1-reasoning-SFT) dataset, available on HuggingFace.\n",
+    "In the next workshop we fine-tune Qwen3-4B-Instruct-2507 to become a medical expert. To accomplish this, we execute a fine-tuning job using Managed MLflow on SageMaker AI. We get our data from the [FreedomIntelligence/medical-o1-reasoning-SFT](https://huggingface.co/datasets/FreedomIntelligence/medical-o1-reasoning-SFT) dataset, available on HuggingFace.\n",
     "\n",
     "In this lab, we show a small example of what fine-tuning looks like for a single record of the dataset."
    ]
@@ -931,56 +927,25 @@
     "    \"Response\": \"Cystometry in this case of stress urinary incontinence would most likely reveal a normal post-void residual volume, as stress incontinence typically does not involve issues with bladder emptying. Additionally, since stress urinary incontinence is primarily related to physical exertion and not an overactive bladder, you would not expect to see any involuntary detrusor contractions during the test.\"\n",
     "}\n",
     "\n",
+    "SYSTEM_PROMPT = \"\"\"You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning. \n",
+    "Below is an instruction that describes a task, paired with an input that provides further context. \n",
+    "Write a response that appropriately completes the request.\n",
+    "Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.\"\"\"\n",
     "\n",
-    "PROMPT_TEMPLATE = \"\"\"\n",
-    "<|begin_of_text|>\n",
-    "    <|start_header_id|>system<|end_header_id|>\n",
-    "    You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning. \n",
-    "    Below is an instruction that describes a task, paired with an input that provides further context. \n",
-    "    Write a response that appropriately completes the request.\n",
-    "    Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.\n",
-    "    <|eot_id|>\n",
-    "        <|start_header_id|>user<|end_header_id|>\n",
-    "        {{question}}\n",
-    "    <|eot_id|>\n",
-    "    <|start_header_id|>assistant<|end_header_id|>\n",
-    "    {{complex_cot}}\n",
-    "    {{answer}}\n",
-    "<|eot_id|>\n",
-    "\"\"\"\n",
+    "def convert_to_messages(sample, system_prompt=\"\"):\n",
+    "    \n",
+    "    messages = [\n",
+    "        {\"role\": \"system\", \"content\": SYSTEM_PROMPT},\n",
+    "        {\"role\": \"user\", \"content\": sample[\"Question\"]},\n",
+    "        {\"role\": \"assistant\", \"content\": f\"{sample[\"Complex_CoT\"]}\\n\\n{sample[\"Response\"]}\"}\n",
+    "    ]\n",
+    "\n",
+    "    sample[\"messages\"] = messages\n",
+    "    \n",
+    "    return sample\n",
     "\n",
-    "# Template dataset to add prompt to each sample\n",
-    "def template_dataset(sample):\n",
-    "    try:\n",
-    "        sample[\"text\"] = PROMPT_TEMPLATE.format(question=sample[\"Question\"],\n",
-    "                                                complex_cot=sample[\"Complex_CoT\"],\n",
-    "                                                answer=sample[\"Response\"])\n",
-    "        return sample\n",
-    "    except KeyError as e:\n",
-    "        print(f\"KeyError in template_dataset: {str(e)}\")\n",
-    "        # Provide default values for missing fields\n",
-    "        missing_key = str(e).strip(\"'\")\n",
-    "        if missing_key == \"Question\":\n",
-    "            sample[\"text\"] = PROMPT_TEMPLATE.format(\n",
-    "                question=\"[Missing question]\",\n",
-    "                complex_cot=sample.get(\"Complex_CoT\", \"[Missing CoT]\"),\n",
-    "                answer=sample.get(\"Response\", \"[Missing response]\")\n",
-    "            )\n",
-    "        elif missing_key == \"Complex_CoT\":\n",
-    "            sample[\"text\"] = PROMPT_TEMPLATE.format(\n",
-    "                question=sample[\"Question\"],\n",
-    "                complex_cot=\"[Missing CoT]\",\n",
-    "                answer=sample.get(\"Response\", \"[Missing response]\")\n",
-    "            )\n",
-    "        elif missing_key == \"Response\":\n",
-    "            sample[\"text\"] = PROMPT_TEMPLATE.format(\n",
-    "                question=sample[\"Question\"],\n",
-    "                complex_cot=sample.get(\"Complex_CoT\", \"[Missing CoT]\"),\n",
-    "                answer=\"[Missing response]\"\n",
-    "            )\n",
-    "        return sample\n",
     "\n",
-    "PROCESSED_SAMPLE = template_dataset(FINE_TUNING_DATA_SAMPLE)\n",
+    "PROCESSED_SAMPLE = convert_to_messages(FINE_TUNING_DATA_SAMPLE)\n",
     "print(PROCESSED_SAMPLE)"
    ]
   },
@@ -1097,8 +1062,15 @@
     "4. Creating and applying Guardrails to our model\n",
     "5. Tracing model calls using MLFlow tracing\n",
     "\n",
-    "Next, we show how to actually perform fine-tuning on this DeepSeek model to improve the model's performance in this domain. Moreover, we'll orchestrate all of these steps into a fine-tuning pipeline powered by Managed MLFlow and SageMaker AI Pipelines."
+    "Next, we show how to actually perform fine-tuning on this Qwen3 model to improve the model's performance in this domain. Moreover, we'll orchestrate all of these steps into a fine-tuning pipeline powered by Managed MLFlow and SageMaker AI Pipelines."
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
 
@@ -114,15 +114,15 @@
     "sagemaker_session = sagemaker.session.Session()\n",
     "role = sagemaker.get_execution_role()\n",
     "instance_type = \"ml.m5.xlarge\"\n",
-    "pipeline_name = \"AIM405-deepseek-finetune-pipeline\"\n",
+    "pipeline_name = \"AIM405-qwen3-finetune-pipeline\"\n",
     "bucket_name = sagemaker_session.default_bucket()\n",
     "default_prefix = sagemaker_session.default_bucket_prefix\n",
     "if default_prefix:\n",
     "    input_path = f'{default_prefix}/datasets/llm-fine-tuning-modeltrainer-sft'\n",
     "else:\n",
     "    input_path = f'datasets/llm-fine-tuning-modeltrainer-sft'\n",
     "\n",
-    "model_id = \"deepseek-ai/DeepSeek-R1-Distill-Llama-8B\"\n",
+    "model_id = \"Qwen/Qwen3-4B-Instruct-2507\"\n",
     "model_id_filesafe = model_id.replace(\"/\",\"_\").replace(\".\", \"_\")"
    ]
   },
@@ -157,15 +157,14 @@
    "source": [
     "mlflow_tracking_server_arn = \"<REPLACE WITH YOUR ARN>\"\n",
     "\n",
-    "if not mlflow_tracking_server_arn:\n",
-    "    try:\n",
-    "        response = boto3.client('sagemaker').describe_mlflow_tracking_server(\n",
-    "            TrackingServerName='genai-mlflow-tracker'\n",
-    "        )\n",
-    "        mlflow_tracking_server_arn = response['TrackingServerArn']\n",
-    "        print(f\"MLflow Tracking Server ARN: {mlflow_tracking_server_arn}\")\n",
-    "    except ClientError:\n",
-    "        print(\"No MLflow Tracking Server Found, please input a value for mlflow_tracking_server_arn\")\n",
+    "try:\n",
+    "    response = boto3.client('sagemaker').describe_mlflow_tracking_server(\n",
+    "        TrackingServerName='genai-mlflow-tracker'\n",
+    "    )\n",
+    "    mlflow_tracking_server_arn = response['TrackingServerArn']\n",
+    "    print(f\"MLflow Tracking Server ARN: {mlflow_tracking_server_arn}\")\n",
+    "except ClientError:\n",
+    "    print(\"No MLflow Tracking Server Found, please input a value for mlflow_tracking_server_arn\")\n",
     "\n",
     "os.environ[\"mlflow_tracking_server_arn\"] = mlflow_tracking_server_arn\n",
     "os.environ[\"pipeline_name\"] = pipeline_name"
@@ -520,16 +519,19 @@
     "    test_dataset_s3_path=preprocessing_step[2],\n",
     "    train_config_s3_path=train_config_s3_path,\n",
     "    role=role,\n",
-    "    model_id=model_s3_destination,\n",
+    "    model_id=model_s3_destination\n",
     ")\n",
     "run_id=training_step[0]\n",
     "model_artifacts_s3_path=training_step[2]\n",
-    "output_path=training_step[3]\n",
+    "# output_path=training_step[3]\n",
     "\n",
     "deploy_step = deploy_step.deploy(\n",
+    "    tracking_server_arn=mlflow_tracking_server_arn,\n",
     "    model_artifacts_s3_path=model_artifacts_s3_path,\n",
-    "    output_path=output_path,\n",
+    "    # output_path=output_path,\n",
     "    model_id=model_s3_destination,\n",
+    "    experiment_name=pipeline_name,\n",
+    "    run_id=run_id,\n",
     ")\n",
     "endpoint_name=deploy_step\n",
     "\n",
@@ -574,7 +576,7 @@
     "            run_id=run_id,  # Assuming training_step returns run_id as first output\n",
     "            model_artifacts_s3_path=model_artifacts_s3_path,  # Assuming training_step returns artifacts path as second output\n",
     "            model_id=model_id,\n",
-    "            model_name=f\"Fine-Tuned-Medical-DeepSeek\",\n",
+    "            model_name=f\"Fine-Tuned-Medical-Qwen3-4B-Instruct-2507\",\n",
     "            endpoint_name=endpoint_name,\n",
     "            evaluation_score=quantitative_eval_step[\"rougeL_f\"],  # Get the evaluation score\n",
     "            pipeline_name=pipeline_name,\n",
@@ -728,8 +730,7 @@
     "\n",
     "# Clean up endpoint\n",
     "try:\n",
-    "    model_name_safe = model_id.split('/')[-1].replace('.', '-').replace('_', '-')\n",
-    "    endpoint_name = f\"{model_name_safe}-sft-djl\"\n",
+    "    endpoint_name = f\"{model_id.replace('/', '-').replace('_', '-')}-sft-djl\"\n",
     "    \n",
     "    print(f\"Cleaning up endpoint: {endpoint_name}\")\n",
     "    if delete_endpoint_with_retry(endpoint_name):\n",
 
@@ -1,21 +1,20 @@
-awscli==1.42.25
-transformers==4.50.2
+transformers==4.52.2
 peft==0.14.0
 accelerate==1.3.0
 bitsandbytes==0.45.1
-datasets==3.5.0
+datasets==3.2.0
 evaluate==0.4.3
 huggingface_hub[hf_transfer]==0.33.4
-mlflow
+mlflow==2.22.2
 safetensors>=0.5.2
-sagemaker==2.244.0
+sagemaker==2.252.0
 sagemaker-mlflow==0.1.0
 sentencepiece==0.2.0
 scikit-learn==1.6.1
 tokenizers>=0.21.0
-trl==0.9.6
-psutil
-py7zr
-pynvml
-xtarfile
-rouge-score
+trl==0.18.0
+psutil==7.1.0
+py7zr==1.0.0
+pynvml==13.0.1
+xtarfile==0.2.1
+rouge-score==0.1.2