Skip to content

Commit fef5a52

Browse files
committed
black fmt
1 parent e87c321 commit fef5a52

File tree

1 file changed

+26
-27
lines changed

1 file changed

+26
-27
lines changed

sdk/python/foundation-models/system/reinforcement-learning/reinforcement-learning.ipynb

Lines changed: 26 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -325,33 +325,32 @@
325325
"metadata": {},
326326
"outputs": [],
327327
"source": [
328-
"eval_job, status = (\n",
329-
" run_evaluation_pipeline( # Function which invokes the model evaluation pipeline.\n",
330-
" ml_client=ml_client,\n",
331-
" registry_ml_client=registry_ml_client,\n",
332-
" compute_cluster=\"k8s-a100-compute\",\n",
333-
" grpo_model_dir=grpo_registered_model.path, # Output from GPRO RL provided as data asset created from earlier step.\n",
334-
" rlpp_model_dir=rlpp_registered_model.path, # Output from Reinforce_plus_plus RL provided as data asset created from earlier step.\n",
335-
" validation_dataset_path=test_data_path, # Path to test dataset\n",
336-
" run_config={\n",
337-
" \"num_nodes\": 1, # Number of nodes to be used for evaluation run.\n",
338-
" \"number_of_gpu_to_use\": 8, # Number of GPUs in a node to be used for evaluation run.\n",
339-
" \"base_path_1_label\": \"GRPO\", # Label to identify GRPO model outputs.\n",
340-
" \"base_path_2_label\": \"RLPP\", # Label to identify RLPP model outputs.\n",
341-
" \"explore_pattern_1\": \"global_step_{checkpoint}/actor/lora_adapter/\",\n",
342-
" \"explore_pattern_2\": \"global_step_{checkpoint}/actor/lora_adapter/\",\n",
343-
" \"checkpoint_values_1\": \"12\",\n",
344-
" \"checkpoint_values_2\": \"12\",\n",
345-
" \"use_lora_adapters_1\": True,\n",
346-
" \"use_lora_adapters_2\": True,\n",
347-
" \"evaluate_base_model\": True, # Set to True to evaluate base model along with RL finetuned models.\n",
348-
" \"hf_model_id\": \"deepseek-ai/DeepSeek-R1-Distill-Qwen-7B\", # Huggingface ID of the base model\n",
349-
" \"max_prompt_length\": 8196,\n",
350-
" \"max_response_length\": 1024,\n",
351-
" \"dtype\": \"bfloat16\",\n",
352-
" \"tensor_parallel_size\": 4,\n",
353-
" }, # Configuration parameters for evaluation run.\n",
354-
" )\n",
328+
"# Function which invokes the model evaluation pipeline.\n",
329+
"eval_job, status = run_evaluation_pipeline(\n",
330+
" ml_client=ml_client,\n",
331+
" registry_ml_client=registry_ml_client,\n",
332+
" compute_cluster=\"k8s-a100-compute\",\n",
333+
" grpo_model_dir=grpo_registered_model.path, # Output from GPRO RL provided as data asset created from earlier step.\n",
334+
" rlpp_model_dir=rlpp_registered_model.path, # Output from Reinforce_plus_plus RL provided as data asset created from earlier step.\n",
335+
" validation_dataset_path=test_data_path, # Path to test dataset\n",
336+
" run_config={\n",
337+
" \"num_nodes\": 1, # Number of nodes to be used for evaluation run.\n",
338+
" \"number_of_gpu_to_use\": 8, # Number of GPUs in a node to be used for evaluation run.\n",
339+
" \"base_path_1_label\": \"GRPO\", # Label to identify GRPO model outputs.\n",
340+
" \"base_path_2_label\": \"RLPP\", # Label to identify RLPP model outputs.\n",
341+
" \"explore_pattern_1\": \"global_step_{checkpoint}/actor/lora_adapter/\",\n",
342+
" \"explore_pattern_2\": \"global_step_{checkpoint}/actor/lora_adapter/\",\n",
343+
" \"checkpoint_values_1\": \"12\",\n",
344+
" \"checkpoint_values_2\": \"12\",\n",
345+
" \"use_lora_adapters_1\": True,\n",
346+
" \"use_lora_adapters_2\": True,\n",
347+
" \"evaluate_base_model\": True, # Set to True to evaluate base model along with RL finetuned models.\n",
348+
" \"hf_model_id\": \"deepseek-ai/DeepSeek-R1-Distill-Qwen-7B\", # Huggingface ID of the base model\n",
349+
" \"max_prompt_length\": 8196,\n",
350+
" \"max_response_length\": 1024,\n",
351+
" \"dtype\": \"bfloat16\",\n",
352+
" \"tensor_parallel_size\": 4,\n",
353+
" }, # Configuration parameters for evaluation run.\n",
355354
")"
356355
]
357356
},

0 commit comments

Comments
 (0)