Skip to content

Commit 66c3743

Browse files
authored
Merge pull request #13 from tsisw/FIR-736
@FIR-736 - lama.cpp: Disable all logs except token generation log
2 parents 2aeae8f + 52e4a58 commit 66c3743

File tree

9 files changed

+31
-4
lines changed

9 files changed

+31
-4
lines changed

common/log.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,20 @@ void common_log_set_timestamps(struct common_log * log, bool timestamps)
9090
#define LOG(...) LOG_TMPL(GGML_LOG_LEVEL_NONE, 0, __VA_ARGS__)
9191
#define LOGV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_NONE, verbosity, __VA_ARGS__)
9292

93+
#if ENABLE_LOG
9394
#define LOG_INF(...) LOG_TMPL(GGML_LOG_LEVEL_INFO, 0, __VA_ARGS__)
9495
#define LOG_WRN(...) LOG_TMPL(GGML_LOG_LEVEL_WARN, 0, __VA_ARGS__)
9596
#define LOG_ERR(...) LOG_TMPL(GGML_LOG_LEVEL_ERROR, 0, __VA_ARGS__)
9697
#define LOG_DBG(...) LOG_TMPL(GGML_LOG_LEVEL_DEBUG, LOG_DEFAULT_DEBUG, __VA_ARGS__)
9798
#define LOG_CNT(...) LOG_TMPL(GGML_LOG_LEVEL_CONT, 0, __VA_ARGS__)
99+
#else
100+
#define LOG_INF(...)
101+
#define LOG_WRN(...)
102+
#define LOG_ERR(...)
103+
#define LOG_DBG(...)
104+
#define LOG_CNT(...)
105+
#endif
106+
#define LOG_TSAVORITE(...) LOG_TMPL(GGML_LOG_LEVEL_TSAVORITE, 0, __VA_ARGS__)
98107

99108
#define LOG_INFV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_INFO, verbosity, __VA_ARGS__)
100109
#define LOG_WRNV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_WARN, verbosity, __VA_ARGS__)

ggml/include/ggml.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -554,6 +554,7 @@ extern "C" {
554554
GGML_LOG_LEVEL_WARN = 3,
555555
GGML_LOG_LEVEL_ERROR = 4,
556556
GGML_LOG_LEVEL_CONT = 5, // continue previous log
557+
GGML_LOG_LEVEL_TSAVORITE = 42,
557558
};
558559

559560
// this tensor...

ggml/src/ggml-impl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ GGML_API void ggml_log_callback_default(enum ggml_log_level level, const char *
8585
#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
8686
#define GGML_LOG_DEBUG(...) ggml_log_internal(GGML_LOG_LEVEL_DEBUG, __VA_ARGS__)
8787
#define GGML_LOG_CONT(...) ggml_log_internal(GGML_LOG_LEVEL_CONT , __VA_ARGS__)
88+
#define GGML_LOG_TSAVORITE(...) ggml_log_internal(GGML_LOG_LEVEL_TSAVORITE , __VA_ARGS__)
8889

8990
#define GGML_DEBUG 0
9091

ggml/src/ggml-tsavorite/ggml-tsavorite.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -504,7 +504,6 @@ static void *ggml_tsavorite_host_malloc(size_t n) {
504504
GGML_TSAVORITE_LOG_INFO("Start %s\n", __func__);
505505

506506
GGML_TSAVORITE_LOG_INFO("\n Allocating memory from tsi_alloc with size %ld \n", n);
507-
printf("\n ANoop Allocating memory from tsi_alloc with size %ld \n", n);
508507
data = tsi_alloc(n);
509508
GGML_TSAVORITE_LOG_CONT("\n Allocating memory from tsi_alloc with size %ld starting memory %p\n",
510509
n, data);
@@ -1800,7 +1799,6 @@ static bool ggml_backend_tsavorite_device_supports_buft(ggml_backend_dev_t dev,
18001799
// ggml_backend_sched_backend_id_from_cur -> ggml_backend_offload_op ->
18011800
static bool ggml_backend_tsavorite_device_offload_op(ggml_backend_dev_t dev,
18021801
const struct ggml_tensor *op) {
1803-
// printf("\n ANoop Calling %s \n ", __func__);
18041802
if (op->type != GGML_TYPE_F32)
18051803
return false;
18061804
switch (op->op) {
@@ -1894,8 +1892,9 @@ static struct ggml_backend_reg_i ggml_backend_tsavorite_reg_i = {
18941892
/* .get_proc_address = */ NULL,
18951893
};
18961894

1895+
18971896
ggml_backend_reg_t ggml_backend_tsavorite_reg(void) {
1898-
ggml_tsavorite_log_type_val = GGML_TSAVORITE_LOG_ERROR;
1897+
ggml_tsavorite_log_type_val = GGML_TSAVORITE_LOG_NONE;
18991898
ggml_tsavorite_kernel_mode_flag = GGML_TSAVORITE_KERNEL_MODE_MLIR;
19001899
GGML_TSAVORITE_LOG_INFO("Start %s\n", __func__);
19011900
g_ggml_backend_tsavorite_reg.iface = ggml_backend_tsavorite_reg_i;

ggml/src/ggml.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,7 @@ static void ggml_log_internal_v(enum ggml_log_level level, const char * format,
249249
void ggml_log_internal(enum ggml_log_level level, const char * format, ...) {
250250
va_list args;
251251
va_start(args, format);
252+
if (level == GGML_LOG_LEVEL_TSAVORITE)
252253
ggml_log_internal_v(level, format, args);
253254
va_end(args);
254255
}

src/llama-context.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2615,13 +2615,19 @@ void llama_perf_context_print(const llama_context * ctx) {
26152615
const auto data = llama_perf_context(ctx);
26162616

26172617
const double t_end_ms = 1e-3 * ggml_time_us();
2618-
26192618
LLAMA_LOG_INFO("%s: load time = %10.2f ms\n", __func__, data.t_load_ms);
26202619
LLAMA_LOG_INFO("%s: prompt eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n",
26212620
__func__, data.t_p_eval_ms, data.n_p_eval, data.t_p_eval_ms / data.n_p_eval, 1e3 / data.t_p_eval_ms * data.n_p_eval);
26222621
LLAMA_LOG_INFO("%s: eval time = %10.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n",
26232622
__func__, data.t_eval_ms, data.n_eval, data.t_eval_ms / data.n_eval, 1e3 / data.t_eval_ms * data.n_eval);
26242623
LLAMA_LOG_INFO("%s: total time = %10.2f ms / %5d tokens\n", __func__, (t_end_ms - data.t_start_ms), (data.n_p_eval + data.n_eval));
2624+
2625+
LLAMA_LOG_TSAVORITE("%s: load time = %10.2f ms\n", __func__, data.t_load_ms);
2626+
LLAMA_LOG_TSAVORITE("%s: prompt eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n",
2627+
__func__, data.t_p_eval_ms, data.n_p_eval, data.t_p_eval_ms / data.n_p_eval, 1e3 / data.t_p_eval_ms * data.n_p_eval);
2628+
LLAMA_LOG_TSAVORITE("%s: eval time = %10.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n",
2629+
__func__, data.t_eval_ms, data.n_eval, data.t_eval_ms / data.n_eval, 1e3 / data.t_eval_ms * data.n_eval);
2630+
LLAMA_LOG_TSAVORITE("%s: total time = %10.2f ms / %5d tokens\n", __func__, (t_end_ms - data.t_start_ms), (data.n_p_eval + data.n_eval));
26252631
}
26262632

26272633
void llama_perf_context_reset(llama_context * ctx) {

src/llama-impl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ void llama_log_callback_default(ggml_log_level level, const char * text, void *
2929
#define LLAMA_LOG_ERROR(...) llama_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
3030
#define LLAMA_LOG_DEBUG(...) llama_log_internal(GGML_LOG_LEVEL_DEBUG, __VA_ARGS__)
3131
#define LLAMA_LOG_CONT(...) llama_log_internal(GGML_LOG_LEVEL_CONT , __VA_ARGS__)
32+
#define LLAMA_LOG_TSAVORITE(...) llama_log_internal(GGML_LOG_LEVEL_TSAVORITE, __VA_ARGS__)
3233

3334
//
3435
// helpers

src/llama-sampling.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2562,6 +2562,8 @@ void llama_perf_sampler_print(const struct llama_sampler * chain) {
25622562

25632563
LLAMA_LOG_INFO("%s: sampling time = %10.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n",
25642564
__func__, data.t_sample_ms, data.n_sample, data.t_sample_ms / data.n_sample, 1e3 / data.t_sample_ms * data.n_sample);
2565+
LLAMA_LOG_TSAVORITE("\n\n%s: sampling time = %10.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)",
2566+
__func__, data.t_sample_ms, data.n_sample, data.t_sample_ms / data.n_sample, 1e3 / data.t_sample_ms * data.n_sample);
25652567
}
25662568

25672569
void llama_perf_sampler_reset(struct llama_sampler * chain) {

tools/main/main.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,12 @@ static std::vector<llama_token> * g_output_tokens;
4141
static bool is_interacting = false;
4242
static bool need_insert_eot = false;
4343

44+
static void my_logger(ggml_log_level level, const char *text, void *user_data) {
45+
if (level == GGML_LOG_LEVEL_TSAVORITE) {
46+
fprintf(stderr, "%s", text); // only show warnings or errors
47+
}
48+
}
49+
4450
static void print_usage(int argc, char ** argv) {
4551
(void) argc;
4652

@@ -120,6 +126,7 @@ int main(int argc, char ** argv) {
120126
LOG_WRN("%s: warning: scaling RoPE frequency by %g.\n", __func__, params.rope_freq_scale);
121127
}
122128

129+
llama_log_set(my_logger, nullptr);
123130
LOG_INF("%s: llama backend init\n", __func__);
124131

125132
llama_backend_init();

0 commit comments

Comments
 (0)