|
325 | 325 | "metadata": {}, |
326 | 326 | "outputs": [], |
327 | 327 | "source": [ |
328 | | - "eval_job, status = (\n", |
329 | | - " run_evaluation_pipeline( # Function which invokes the model evaluation pipeline.\n", |
330 | | - " ml_client=ml_client,\n", |
331 | | - " registry_ml_client=registry_ml_client,\n", |
332 | | - " compute_cluster=\"k8s-a100-compute\",\n", |
333 | | - " grpo_model_dir=grpo_registered_model.path, # Output from GPRO RL provided as data asset created from earlier step.\n", |
334 | | - " rlpp_model_dir=rlpp_registered_model.path, # Output from Reinforce_plus_plus RL provided as data asset created from earlier step.\n", |
335 | | - " validation_dataset_path=test_data_path, # Path to test dataset\n", |
336 | | - " run_config={\n", |
337 | | - " \"num_nodes\": 1, # Number of nodes to be used for evaluation run.\n", |
338 | | - " \"number_of_gpu_to_use\": 8, # Number of GPUs in a node to be used for evaluation run.\n", |
339 | | - " \"base_path_1_label\": \"GRPO\", # Label to identify GRPO model outputs.\n", |
340 | | - " \"base_path_2_label\": \"RLPP\", # Label to identify RLPP model outputs.\n", |
341 | | - " \"explore_pattern_1\": \"global_step_{checkpoint}/actor/lora_adapter/\",\n", |
342 | | - " \"explore_pattern_2\": \"global_step_{checkpoint}/actor/lora_adapter/\",\n", |
343 | | - " \"checkpoint_values_1\": \"12\",\n", |
344 | | - " \"checkpoint_values_2\": \"12\",\n", |
345 | | - " \"use_lora_adapters_1\": True,\n", |
346 | | - " \"use_lora_adapters_2\": True,\n", |
347 | | - " \"evaluate_base_model\": True, # Set to True to evaluate base model along with RL finetuned models.\n", |
348 | | - " \"hf_model_id\": \"deepseek-ai/DeepSeek-R1-Distill-Qwen-7B\", # Huggingface ID of the base model\n", |
349 | | - " \"max_prompt_length\": 8196,\n", |
350 | | - " \"max_response_length\": 1024,\n", |
351 | | - " \"dtype\": \"bfloat16\",\n", |
352 | | - " \"tensor_parallel_size\": 4,\n", |
353 | | - " }, # Configuration parameters for evaluation run.\n", |
354 | | - " )\n", |
| 328 | + "# Function which invokes the model evaluation pipeline.\n", |
| 329 | + "eval_job, status = run_evaluation_pipeline(\n", |
| 330 | + " ml_client=ml_client,\n", |
| 331 | + " registry_ml_client=registry_ml_client,\n", |
| 332 | + " compute_cluster=\"k8s-a100-compute\",\n", |
| 333 | + " grpo_model_dir=grpo_registered_model.path, # Output from GPRO RL provided as data asset created from earlier step.\n", |
| 334 | + " rlpp_model_dir=rlpp_registered_model.path, # Output from Reinforce_plus_plus RL provided as data asset created from earlier step.\n", |
| 335 | + " validation_dataset_path=test_data_path, # Path to test dataset\n", |
| 336 | + " run_config={\n", |
| 337 | + " \"num_nodes\": 1, # Number of nodes to be used for evaluation run.\n", |
| 338 | + " \"number_of_gpu_to_use\": 8, # Number of GPUs in a node to be used for evaluation run.\n", |
| 339 | + " \"base_path_1_label\": \"GRPO\", # Label to identify GRPO model outputs.\n", |
| 340 | + " \"base_path_2_label\": \"RLPP\", # Label to identify RLPP model outputs.\n", |
| 341 | + " \"explore_pattern_1\": \"global_step_{checkpoint}/actor/lora_adapter/\",\n", |
| 342 | + " \"explore_pattern_2\": \"global_step_{checkpoint}/actor/lora_adapter/\",\n", |
| 343 | + " \"checkpoint_values_1\": \"12\",\n", |
| 344 | + " \"checkpoint_values_2\": \"12\",\n", |
| 345 | + " \"use_lora_adapters_1\": True,\n", |
| 346 | + " \"use_lora_adapters_2\": True,\n", |
| 347 | + " \"evaluate_base_model\": True, # Set to True to evaluate base model along with RL finetuned models.\n", |
| 348 | + " \"hf_model_id\": \"deepseek-ai/DeepSeek-R1-Distill-Qwen-7B\", # Huggingface ID of the base model\n", |
| 349 | + " \"max_prompt_length\": 8196,\n", |
| 350 | + " \"max_response_length\": 1024,\n", |
| 351 | + " \"dtype\": \"bfloat16\",\n", |
| 352 | + " \"tensor_parallel_size\": 4,\n", |
| 353 | + " }, # Configuration parameters for evaluation run.\n", |
355 | 354 | ")" |
356 | 355 | ] |
357 | 356 | }, |
|
0 commit comments