Skip to content

Commit e68b55b

Browse files
committed
Remove unused use_fp16 argument to idx_compute_scores_tile
1 parent a5ae544 commit e68b55b

File tree

6 files changed

+7
-11
lines changed

6 files changed

+7
-11
lines changed

src/llama-sparse-indexer.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,7 @@ ggml_tensor * sparse_attn_indexer::idx_compute_scores_tile(
5151
ggml_tensor * k_scale_2d,
5252
int64_t D, int64_t H,
5353
int64_t Tc, int64_t kv_end,
54-
int64_t t0,
55-
bool use_fp16) {
54+
int64_t t0) {
5655
const char * __prof_env = getenv("LLAMA_SPARSE_PROF");
5756
bool prof = (__prof_env && atoi(__prof_env) != 0);
5857
double t0_us = 0.0;
@@ -184,7 +183,6 @@ ggml_tensor * sparse_attn_indexer::idx_compute_scores_tile(
184183
float v = out[(size_t)i + (size_t)kv * (size_t)tc];
185184
fprintf(stderr, " C[%d,%d]= % .6f", i, tc, v);
186185
}
187-
fprintf(stderr, "");
188186
}
189187
}
190188

src/llama-sparse-indexer.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,7 @@ struct sparse_attn_indexer {
3232
ggml_tensor * k_scale_2d,
3333
int64_t D, int64_t H,
3434
int64_t Tc, int64_t kv_end,
35-
int64_t t0,
36-
bool use_fp16);
35+
int64_t t0);
3736

3837
// Build KV-aware top-k token indices using the Lightning Indexer tensors.
3938
// If mctx is nullptr, uses freshly computed K_indexer directly without cache writes.

src/llama-sparse-topk.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -505,7 +505,7 @@ ggml_tensor * sparse_attn_topk::select_topk_tokens_indexer_kvaware(
505505
}
506506

507507
if (dbg && t0 == 0) {
508-
ggml_tensor * ref_scores = llama::sparse_attn_indexer::idx_compute_scores_tile(ctx, q3d, k_indexer_f16, weights, k_scale_2d, D, H, Tc, kv_end, t0, use_fp16);
508+
ggml_tensor * ref_scores = llama::sparse_attn_indexer::idx_compute_scores_tile(ctx, q3d, k_indexer_f16, weights, k_scale_2d, D, H, Tc, kv_end, t0);
509509
ggml_tensor * ref_head = ggml_view_2d(ctx, ref_scores, std::min<int64_t>(kv_end, (int64_t)8), std::min<int64_t>(Tc, (int64_t)4), ref_scores->nb[1], 0);
510510
cb(ref_head, "idxkv_scores_ref_head", -1);
511511
if (gf) { ggml_set_output(ref_head); ggml_build_forward_expand(gf, ref_head); }
@@ -542,7 +542,7 @@ ggml_tensor * sparse_attn_topk::select_topk_tokens_indexer_kvaware(
542542
}
543543

544544
} else {
545-
scores_tc = llama::sparse_attn_indexer::idx_compute_scores_tile(ctx, q3d, k_indexer_f16, weights, k_scale_2d, D, H, Tc, kv_end, t0, use_fp16);
545+
scores_tc = llama::sparse_attn_indexer::idx_compute_scores_tile(ctx, q3d, k_indexer_f16, weights, k_scale_2d, D, H, Tc, kv_end, t0);
546546
}
547547
}
548548

tests/test-indexer-fused-op-cuda.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -293,8 +293,7 @@ int main() {
293293
H_ref,
294294
Tc_ref,
295295
kv_ref,
296-
/*t0=*/0,
297-
/*use_fp16=*/false);
296+
/*t0=*/0);
298297

299298
ggml_cgraph * gf_ref = ggml_new_graph(ctx_ref);
300299
ggml_build_forward_expand(gf_ref, scores_ref);

tests/test-indexer-scores-tile.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ int main() {
8585
std::memcpy(ks1d->data, KS.data(), ggml_nbytes(ks1d));
8686

8787
ggml_tensor * scores = sparse_attn_indexer::idx_compute_scores_tile(
88-
ctx, q3d, a_k, w2d, ks2d, D, H, Tc, kv, 0, /*use_fp16=*/false);
88+
ctx, q3d, a_k, w2d, ks2d, D, H, Tc, kv, 0);
8989

9090
const int iters = 10;
9191

tests/test-indexer-triplet-vs-fused.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ int main() {
139139
ggml_tensor * scores_tc = sparse_attn_indexer::idx_compute_scores_tile(
140140
cpu_ctx.ctx, q3d, a_k, w2d, k_scale_2d,
141141
D_index, H_index, T, N_kv,
142-
/*t0=*/0, /*use_fp16=*/false);
142+
/*t0=*/0);
143143

144144
ggml_cgraph * gf_cpu = ggml_new_graph(cpu_ctx.ctx);
145145
ggml_build_forward_expand(gf_cpu, scores_tc);

0 commit comments

Comments
 (0)