Skip to content

Commit 7b8d735

Browse files
committed
mtmd: fixed the wrong scaler for get_rel_pos
1 parent 0f5587d commit 7b8d735

File tree

1 file changed

+6
-3
lines changed

1 file changed

+6
-3
lines changed

tools/mtmd/clip.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2529,19 +2529,22 @@ struct clip_graph {
25292529
); // [q_size, k_size]
25302530
k_coord = ggml_cont(ctx, ggml_repeat(ctx, k_coord, rel)); // [q_size, k_size]
25312531

2532+
float q_scale = std::max((float)k_size/q_size, 1.0f);
2533+
float k_scale = std::max((float)q_size/k_size, 1.0f);
2534+
25322535
// This wouldn't be triggered in DeepSeek-OCR. Just for compatibility with
25332536
// the original implementation.
25342537
if (q_size != k_size) {
2535-
q_coord = ggml_scale_inplace(ctx, q_coord, std::max((float)k_size/q_size, 1.0f));
2536-
k_coord = ggml_scale_inplace(ctx, k_coord, std::max((float)q_size/k_size, 1.0f));
2538+
q_coord = ggml_scale_inplace(ctx, q_coord, q_scale);
2539+
k_coord = ggml_scale_inplace(ctx, k_coord, k_scale);
25372540
}
25382541

25392542
// -------------------------------------------------
25402543
// relative_coords = q - k + (k_size - 1) // SAME as PyTorch when no scaling
25412544
// -------------------------------------------------
25422545

25432546
rel = ggml_sub(ctx, q_coord, k_coord); // [q_size, k_size]
2544-
rel = ggml_scale_bias(ctx, rel, 1.0f, static_cast<float>(k_size) - 1.0f); // [q_size, k_size]
2547+
rel = ggml_scale_bias(ctx, rel, 1.0f, (k_size - 1.0f)*k_scale); // [q_size, k_size]
25452548
// Clamp to [0, L-1] range for valid indexing
25462549
rel = ggml_clamp(ctx, rel, 0.0f, static_cast<float>(rel_pos->ne[1] - 1));
25472550

0 commit comments

Comments
 (0)