Skip to content

Commit 9029297

Browse files
authored
[Feat]: Adding example for grpo pipeline job trigger (#3663)
* [Feat]: Adding example for grpo pipeline job trigger * [update]: format with black
1 parent 87ddd59 commit 9029297

File tree

2 files changed

+486
-0
lines changed

2 files changed

+486
-0
lines changed
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
{
2+
"deepspeed_multinode_launcher": "standard",
3+
"offload_optimizer_device": "cpu",
4+
"offload_param_device": "cpu",
5+
"zero3_init_flag": true,
6+
"zero3_save_16bit_model": true,
7+
"zero_stage": 3,
8+
"overlap_comm": true,
9+
"sub_group_size": 1000000,
10+
"stage3_max_live_parameters": 1000,
11+
"stage3_max_reuse_distance": 1000000,
12+
"stage3_gather_16bit_weights_on_model_save": true,
13+
"train_batch_size": "auto",
14+
"train_micro_batch_size_per_gpu": "auto",
15+
"gradient_accumulation_steps" : "auto",
16+
"reduce_bucket_size": 5000000,
17+
"stage3_prefetch_bucket_size": 5000000,
18+
"stage3_param_persistence_threshold": 5000000,
19+
"memory_efficient_linear": true,
20+
"contiguous_gradients": true,
21+
"zero_optimization": {
22+
"stage": 3,
23+
"cpu_offload": true,
24+
"contiguous_gradients": true,
25+
"sub_group_size": 1000000,
26+
"stage3_prefetch_bucket_size": 5000000,
27+
"stage3_param_persistence_threshold": 5000000,
28+
"stage3_max_live_parameters": 1000,
29+
"stage3_max_reuse_distance": 1000000,
30+
"stage3_gather_16bit_weights_on_model_save": true
31+
},
32+
"optimizer": {
33+
"type": "Adam",
34+
"params": {
35+
"lr": "auto",
36+
"betas": [
37+
0.9,
38+
0.999
39+
],
40+
"eps": 1e-8,
41+
"weight_decay": 0.0
42+
}
43+
}
44+
}

0 commit comments

Comments
 (0)