1818#include < vector>
1919
2020static void print_usage (int , char ** argv) {
21- LOG (" \n example usage:\n " );
22- LOG (" \n %s -m model.gguf -c 8192 -b 2048 -ub 512\n " , argv[0 ]);
23- LOG (" \n " );
21+ LOG_TEE (" \n example usage:\n " );
22+ LOG_TEE (" \n %s -m model.gguf -c 8192 -b 2048 -ub 512\n " , argv[0 ]);
23+ LOG_TEE (" \n " );
2424}
2525
2626int main (int argc, char ** argv) {
@@ -83,7 +83,7 @@ int main(int argc, char ** argv) {
8383
8484 const int ret = llama_decode (ctx, batch_view);
8585 if (ret != 0 ) {
86- LOG (" failed to decode the batch, n_batch = %d, ret = %d\n " , n_batch, ret);
86+ LOG_TEE (" failed to decode the batch, n_batch = %d, ret = %d\n " , n_batch, ret);
8787 return false ;
8888 }
8989
@@ -97,11 +97,11 @@ int main(int argc, char ** argv) {
9797 const unsigned int tg = params.n_ubatch / 4 ;
9898
9999 if (!params.sweep_bench_output_jsonl ) {
100- LOG (" \n " );
101- LOG (" %s: n_kv_max = %d, n_batch = %d, n_ubatch = %d, flash_attn = %d, n_gpu_layers = %d, n_threads = %u, n_threads_batch = %u\n " , __func__, n_kv_max, params.n_batch , params.n_ubatch , params.flash_attn , params.n_gpu_layers , ctx_params.n_threads , ctx_params.n_threads_batch );
102- LOG (" \n " );
103- LOG (" |%6s | %6s | %6s | %8s | %8s | %8s | %8s |\n " , " PP" , " TG" , " N_KV" , " T_PP s" , " S_PP t/s" , " T_TG s" , " S_TG t/s" );
104- LOG (" |%6s-|-%6s-|-%6s-|-%8s-|-%8s-|-%8s-|-%8s-|\n " , " ------" , " ------" , " ------" , " --------" , " --------" , " --------" , " --------" );
100+ LOG_TEE (" \n " );
101+ LOG_TEE (" %s: n_kv_max = %d, n_batch = %d, n_ubatch = %d, flash_attn = %d, n_gpu_layers = %d, n_threads = %u, n_threads_batch = %u\n " , __func__, n_kv_max, params.n_batch , params.n_ubatch , params.flash_attn , params.n_gpu_layers , ctx_params.n_threads , ctx_params.n_threads_batch );
102+ LOG_TEE (" \n " );
103+ LOG_TEE (" |%6s | %6s | %6s | %8s | %8s | %8s | %8s |\n " , " PP" , " TG" , " N_KV" , " T_PP s" , " S_PP t/s" , " T_TG s" , " S_TG t/s" );
104+ LOG_TEE (" |%6s-|-%6s-|-%6s-|-%8s-|-%8s-|-%8s-|-%8s-|\n " , " ------" , " ------" , " ------" , " --------" , " --------" , " --------" , " --------" );
105105 }
106106
107107 llama_batch batch = llama_batch_init (n_kv_max, 0 , 1 );
@@ -111,7 +111,7 @@ int main(int argc, char ** argv) {
111111 llama_batch_add (batch, bos, 0 , { 0 }, false );
112112
113113 if (!decode_helper (ctx, batch, ctx_params.n_batch )) {
114- LOG (" %s: llama_decode() failed\n " , __func__);
114+ LOG_TEE (" %s: llama_decode() failed\n " , __func__);
115115 return 1 ;
116116 }
117117 }
@@ -131,7 +131,7 @@ int main(int argc, char ** argv) {
131131 llama_batch_add (batch, std::rand () % n_vocab, n_kv + i, { 0 }, true );
132132
133133 if (!decode_helper (ctx, batch, ctx_params.n_batch )) {
134- LOG (" %s: llama_decode() failed\n " , __func__);
134+ LOG_TEE (" %s: llama_decode() failed\n " , __func__);
135135 return 1 ;
136136 }
137137 }
@@ -153,7 +153,7 @@ int main(int argc, char ** argv) {
153153 const auto t_pp_start = ggml_time_us ();
154154
155155 if (!decode_helper (ctx, batch, ctx_params.n_batch )) {
156- LOG (" %s: llama_decode() failed\n " , __func__);
156+ LOG_TEE (" %s: llama_decode() failed\n " , __func__);
157157 return 1 ;
158158 }
159159
@@ -167,14 +167,14 @@ int main(int argc, char ** argv) {
167167 const float speed_tg = tg / t_tg;
168168
169169 if (params.sweep_bench_output_jsonl ) {
170- LOG (
170+ LOG_TEE (
171171 " {\" n_kv_max\" : %d, \" n_batch\" : %d, \" n_ubatch\" : %d, \" flash_attn\" : %d, \" n_gpu_layers\" : %d, \" n_threads\" : %u, \" n_threads_batch\" : %u, "
172172 " \" pp\" : %d, \" tg\" : %d, \" n_kv\" : %d, \" t_pp\" : %f, \" speed_pp\" : %f, \" t_tg\" : %f, \" speed_tg\" : %f }\n " ,
173173 n_kv_max, params.n_batch , params.n_ubatch , params.flash_attn , params.n_gpu_layers , ctx_params.n_threads , ctx_params.n_threads_batch ,
174174 pp, tg, n_kv, t_pp, speed_pp, t_tg, speed_tg
175175 );
176176 } else {
177- LOG (" |%6d | %6d | %6d | %8.3f | %8.2f | %8.3f | %8.2f |\n " , pp, tg, n_kv, t_pp, speed_pp, t_tg, speed_tg);
177+ LOG_TEE (" |%6d | %6d | %6d | %8.3f | %8.2f | %8.3f | %8.2f |\n " , pp, tg, n_kv, t_pp, speed_pp, t_tg, speed_tg);
178178 }
179179 }
180180
0 commit comments