Skip to content

Commit c41f2b1

Browse files
authored
Merge pull request #43 from bigcode-project/tokens-per-second-gpu
add token/s/gpu to wandb
2 parents e0b644b + 36d0435 commit c41f2b1

File tree

1 file changed

+5
-0
lines changed

1 file changed

+5
-0
lines changed

megatron/training.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -619,6 +619,10 @@ def add_to_logging(name):
619619
if iteration % args.log_interval == 0:
620620
elapsed_time = timers('interval-time').elapsed()
621621
elapsed_time_per_iteration = elapsed_time / total_iterations
622+
623+
num_gpus = args.data_parallel_size * args.tensor_model_parallel_size * args.pipeline_model_parallel_size
624+
tokens_per_sec_per_gpu = (args.seq_length * batch_size) / num_gpus / elapsed_time_per_iteration
625+
622626
tflops = get_tflops(batch_size, elapsed_time_per_iteration)
623627
if writer:
624628
if args.log_timers_to_tensorboard:
@@ -672,6 +676,7 @@ def add_to_logging(name):
672676
'loss-scale': loss_scale,
673677
'grad-norm': grad_norm,
674678
'tflops': tflops,
679+
'tokens-per-second-per-gpu': tokens_per_sec_per_gpu,
675680
**loss_dict
676681
}
677682
wandb.log(metrics, step=iteration)

0 commit comments

Comments
 (0)