@@ -2529,19 +2529,22 @@ struct clip_graph {
25292529 ); // [q_size, k_size]
25302530 k_coord = ggml_cont (ctx, ggml_repeat (ctx, k_coord, rel)); // [q_size, k_size]
25312531
2532+ float q_scale = std::max ((float )k_size/q_size, 1 .0f );
2533+ float k_scale = std::max ((float )q_size/k_size, 1 .0f );
2534+
25322535 // This wouldn't be triggered in DeepSeek-OCR. Just for compatibility with
25332536 // the original implementation.
25342537 if (q_size != k_size) {
2535- q_coord = ggml_scale_inplace (ctx, q_coord, std::max (( float )k_size/q_size, 1 . 0f ) );
2536- k_coord = ggml_scale_inplace (ctx, k_coord, std::max (( float )q_size/k_size, 1 . 0f ) );
2538+ q_coord = ggml_scale_inplace (ctx, q_coord, q_scale );
2539+ k_coord = ggml_scale_inplace (ctx, k_coord, k_scale );
25372540 }
25382541
25392542 // -------------------------------------------------
25402543 // relative_coords = q - k + (k_size - 1) // SAME as PyTorch when no scaling
25412544 // -------------------------------------------------
25422545
25432546 rel = ggml_sub (ctx, q_coord, k_coord); // [q_size, k_size]
2544- rel = ggml_scale_bias (ctx, rel, 1 .0f , static_cast < float > (k_size) - 1 .0f ); // [q_size, k_size]
2547+ rel = ggml_scale_bias (ctx, rel, 1 .0f , (k_size - 1 .0f )*k_scale ); // [q_size, k_size]
25452548 // Clamp to [0, L-1] range for valid indexing
25462549 rel = ggml_clamp (ctx, rel, 0 .0f , static_cast <float >(rel_pos->ne [1 ] - 1 ));
25472550
0 commit comments