@@ -1078,7 +1078,8 @@ static void llama_sampler_top_k_backend_apply(
10781078 ggml_set_name (top_k_rows, " top_k_rows" );
10791079
10801080 data->logits = ggml_reshape_1d (ctx, top_k_rows, ctx_data->k );
1081- ggml_build_forward_expand (gf, data->logits );
1081+
1082+ GGML_UNUSED (gf);
10821083}
10831084
10841085static struct llama_sampler_i llama_sampler_top_k_i = {
@@ -1264,10 +1265,9 @@ static void llama_sampler_top_p_backend_apply(
12641265 ggml_set_name (data->logits , " top_p_logits" );
12651266
12661267 ggml_set_output (data->candidates );
1267- ggml_build_forward_expand (gf, data->candidates );
1268-
12691268 ggml_set_output (data->logits );
1270- ggml_build_forward_expand (gf, data->logits );
1269+
1270+ GGML_UNUSED (gf);
12711271}
12721272
12731273static struct llama_sampler_i llama_sampler_top_p_i = {
@@ -1421,7 +1421,7 @@ static void llama_sampler_min_p_backend_apply(
14211421 data->logits = ggml_add (ctx, data->logits , min_p_bias);
14221422 ggml_set_name (data->logits , " min_p_logits" );
14231423
1424- ggml_build_forward_expand (gf, data-> logits );
1424+ GGML_UNUSED (gf);
14251425}
14261426
14271427static struct llama_sampler_i llama_sampler_min_p_i = {
@@ -1602,7 +1602,6 @@ static void llama_sampler_backend_temp_sampling(
16021602 struct ggml_tensor * logit = ggml_reshape_2d (ctx, data->logits , 1 , data->logits ->ne [0 ]);
16031603
16041604 data->logits = ggml_get_rows (ctx, logit, max_idx);
1605- ggml_build_forward_expand (gf, data->logits );
16061605
16071606 return ;
16081607 }
@@ -1614,7 +1613,7 @@ static void llama_sampler_backend_temp_sampling(
16141613 data->logits = ggml_cont (ctx, scaled);
16151614 ggml_set_name (data->logits , " temp_scaled_logits" );
16161615
1617- ggml_build_forward_expand (gf, data-> logits );
1616+ GGML_UNUSED (gf);
16181617}
16191618
16201619static void llama_sampler_temp_backend_apply (
@@ -1807,7 +1806,6 @@ static void llama_sampler_temp_ext_backend_apply(
18071806 ggml_set_name (scaled_logits, " temp_ext_scaled_logits" );
18081807
18091808 data->logits = scaled_logits;
1810- ggml_build_forward_expand (gf, data->logits );
18111809}
18121810
18131811static struct llama_sampler_i llama_sampler_temp_ext_i = {
@@ -3080,8 +3078,6 @@ static void llama_sampler_logit_bias_backend_apply(
30803078 // Add the sparse logit logit_bias to the logits
30813079 struct ggml_tensor * logit_biased = ggml_add_inplace (ctx, data->logits , sctx->inp_logit_bias );
30823080 data->logits = logit_biased;
3083-
3084- ggml_build_forward_expand (gf, logit_biased);
30853081}
30863082
30873083static void llama_sampler_logit_bias_backend_set_input (struct llama_sampler * smpl) {
0 commit comments