Skip to content

Commit 1bde707

Browse files
committed
sampling : remove redundant calls to ggml_build_forward_expand
1 parent fce571e commit 1bde707

File tree

1 file changed

+6
-10
lines changed

1 file changed

+6
-10
lines changed

src/llama-sampling.cpp

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1078,7 +1078,8 @@ static void llama_sampler_top_k_backend_apply(
10781078
ggml_set_name(top_k_rows, "top_k_rows");
10791079

10801080
data->logits = ggml_reshape_1d(ctx, top_k_rows, ctx_data->k);
1081-
ggml_build_forward_expand(gf, data->logits);
1081+
1082+
GGML_UNUSED(gf);
10821083
}
10831084

10841085
static struct llama_sampler_i llama_sampler_top_k_i = {
@@ -1264,10 +1265,9 @@ static void llama_sampler_top_p_backend_apply(
12641265
ggml_set_name(data->logits, "top_p_logits");
12651266

12661267
ggml_set_output(data->candidates);
1267-
ggml_build_forward_expand(gf, data->candidates);
1268-
12691268
ggml_set_output(data->logits);
1270-
ggml_build_forward_expand(gf, data->logits);
1269+
1270+
GGML_UNUSED(gf);
12711271
}
12721272

12731273
static struct llama_sampler_i llama_sampler_top_p_i = {
@@ -1421,7 +1421,7 @@ static void llama_sampler_min_p_backend_apply(
14211421
data->logits = ggml_add(ctx, data->logits, min_p_bias);
14221422
ggml_set_name(data->logits, "min_p_logits");
14231423

1424-
ggml_build_forward_expand(gf, data->logits);
1424+
GGML_UNUSED(gf);
14251425
}
14261426

14271427
static struct llama_sampler_i llama_sampler_min_p_i = {
@@ -1602,7 +1602,6 @@ static void llama_sampler_backend_temp_sampling(
16021602
struct ggml_tensor * logit = ggml_reshape_2d(ctx, data->logits, 1, data->logits->ne[0]);
16031603

16041604
data->logits = ggml_get_rows(ctx, logit, max_idx);
1605-
ggml_build_forward_expand(gf, data->logits);
16061605

16071606
return;
16081607
}
@@ -1614,7 +1613,7 @@ static void llama_sampler_backend_temp_sampling(
16141613
data->logits = ggml_cont(ctx, scaled);
16151614
ggml_set_name(data->logits, "temp_scaled_logits");
16161615

1617-
ggml_build_forward_expand(gf, data->logits);
1616+
GGML_UNUSED(gf);
16181617
}
16191618

16201619
static void llama_sampler_temp_backend_apply(
@@ -1807,7 +1806,6 @@ static void llama_sampler_temp_ext_backend_apply(
18071806
ggml_set_name(scaled_logits, "temp_ext_scaled_logits");
18081807

18091808
data->logits = scaled_logits;
1810-
ggml_build_forward_expand(gf, data->logits);
18111809
}
18121810

18131811
static struct llama_sampler_i llama_sampler_temp_ext_i = {
@@ -3080,8 +3078,6 @@ static void llama_sampler_logit_bias_backend_apply(
30803078
// Add the sparse logit logit_bias to the logits
30813079
struct ggml_tensor * logit_biased = ggml_add_inplace(ctx, data->logits, sctx->inp_logit_bias);
30823080
data->logits = logit_biased;
3083-
3084-
ggml_build_forward_expand(gf, logit_biased);
30853081
}
30863082

30873083
static void llama_sampler_logit_bias_backend_set_input(struct llama_sampler * smpl) {

0 commit comments

Comments
 (0)