File tree Expand file tree Collapse file tree 1 file changed +5
-0
lines changed
Expand file tree Collapse file tree 1 file changed +5
-0
lines changed Original file line number Diff line number Diff line change @@ -619,6 +619,10 @@ def add_to_logging(name):
619619 if iteration % args .log_interval == 0 :
620620 elapsed_time = timers ('interval-time' ).elapsed ()
621621 elapsed_time_per_iteration = elapsed_time / total_iterations
622+
623+ num_gpus = args .data_parallel_size * args .tensor_model_parallel_size * args .pipeline_model_parallel_size
624+ tokens_per_sec_per_gpu = (args .seq_length * batch_size ) / num_gpus / elapsed_time_per_iteration
625+
622626 tflops = get_tflops (batch_size , elapsed_time_per_iteration )
623627 if writer :
624628 if args .log_timers_to_tensorboard :
@@ -672,6 +676,7 @@ def add_to_logging(name):
672676 'loss-scale' : loss_scale ,
673677 'grad-norm' : grad_norm ,
674678 'tflops' : tflops ,
679+ 'tokens-per-second-per-gpu' : tokens_per_sec_per_gpu ,
675680 ** loss_dict
676681 }
677682 wandb .log (metrics , step = iteration )
You can’t perform that action at this time.
0 commit comments