From fb431da60182448d83e7b93ac33e98181f433468 Mon Sep 17 00:00:00 2001
From: Ethan Wang <ethan@nexa.ai>
Date: Thu, 20 Nov 2025 19:33:35 +0000
Subject: [PATCH 1/5] vulkan: add get_rel_pos operation and corresponding
 shader

* Introduced new Vulkan pipeline for get_rel_pos operation for both float and half-precision types.
* Implemented the get_rel_pos compute shader to calculate relative positions based on input tensor dimensions.
* Updated shader generation to include new get_rel_pos variants.
---
 ggml/src/ggml-vulkan/ggml-vulkan.cpp          | 39 +++++++++++++++++++
 .../vulkan-shaders/get_rel_pos.comp           | 33 ++++++++++++++++
 .../vulkan-shaders/vulkan-shaders-gen.cpp     |  3 ++
 3 files changed, 75 insertions(+)
 create mode 100644 ggml/src/ggml-vulkan/vulkan-shaders/get_rel_pos.comp

diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index f83dfdaef66..9460b3c59f9 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -701,6 +701,7 @@ struct vk_device_struct {
     vk_pipeline pipeline_rope_neox_f32, pipeline_rope_neox_f16, pipeline_rope_neox_f32_f16;
     vk_pipeline pipeline_rope_multi_f32, pipeline_rope_multi_f16;
     vk_pipeline pipeline_rope_vision_f32, pipeline_rope_vision_f16;
+    vk_pipeline pipeline_get_rel_pos_f32, pipeline_get_rel_pos_f16;
     vk_pipeline pipeline_argsort_f32[num_argsort_pipelines];
     vk_pipeline pipeline_argsort_large_f32[num_argsort_pipelines];
     vk_pipeline pipeline_sum_rows_f32;
@@ -3933,6 +3934,9 @@ static void ggml_vk_load_shaders(vk_device& device) {
         ggml_vk_create_pipeline(device, device->pipeline_rope_neox_f32_f16, "rope_neox_f32_f16", rope_neox_f32_f16_len, rope_neox_f32_f16_data, "main", 5, sizeof(vk_op_rope_push_constants), {1, 512, 1}, {}, 1);
     }
 
+    ggml_vk_create_pipeline(device, device->pipeline_get_rel_pos_f32, "get_rel_pos_f32", get_rel_pos_f32_len, get_rel_pos_f32_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, { 512 }, 1);
+    ggml_vk_create_pipeline(device, device->pipeline_get_rel_pos_f16, "get_rel_pos_f16", get_rel_pos_f16_len, get_rel_pos_f16_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, { 512 }, 1);
+
     for (uint32_t i = 0; i < num_argsort_pipelines; ++i) {
         uint32_t BLOCK_SIZE = 1u << std::min(i, device->max_workgroup_size_log2);
         if (i <= device->max_workgroup_size_log2 &&
@@ -10019,6 +10023,32 @@ static void ggml_vk_rope(ggml_backend_vk_context * ctx, vk_context& subctx, cons
         ggml_vk_make_rope_constants(cgraph->nodes[node_idx], src0, src2 != nullptr, backprop, set_rows_stride));
 }
 
+static void ggml_vk_get_rel_pos(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst) {
+    vk_pipeline pipeline = nullptr;
+    switch (src0->type) {
+        case GGML_TYPE_F32: pipeline = ctx->device->pipeline_get_rel_pos_f32; break;
+        case GGML_TYPE_F16: pipeline = ctx->device->pipeline_get_rel_pos_f16; break;
+        default: GGML_ABORT("fatal error");
+    }
+    GGML_ASSERT(pipeline != nullptr);
+
+    vk_op_unary_push_constants pc = vk_op_unary_push_constants_init(src0, dst, ggml_nelements(dst));
+    init_pushconst_fastdiv(pc);
+
+    std::array<uint32_t, 3> elements;
+    uint32_t ne = ggml_nelements(dst);
+    if (ne > 262144) {
+        elements = { 512, 512, CEIL_DIV(ne, 262144) };
+    } else if (ne > 512) {
+        elements = { 512, CEIL_DIV(ne, 512), 1 };
+    } else {
+        elements = { ne, 1, 1 };
+    }
+
+    ggml_pipeline_request_descriptor_sets(ctx, pipeline, 1);
+    ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { ggml_vk_tensor_subbuffer(ctx, src0), ggml_vk_tensor_subbuffer(ctx, dst) }, pc, elements);
+}
+
 static void ggml_vk_argsort(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst) {
     const uint32_t * op_params = (const uint32_t *)dst->op_params;
 
@@ -11488,6 +11518,7 @@ static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_cgraph * cgr
     case GGML_OP_ROPE_BACK:
     case GGML_OP_MUL_MAT:
     case GGML_OP_MUL_MAT_ID:
+    case GGML_OP_GET_REL_POS:
     case GGML_OP_ARGSORT:
     case GGML_OP_SUM:
     case GGML_OP_SUM_ROWS:
@@ -11817,6 +11848,10 @@ static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_cgraph * cgr
     case GGML_OP_ROPE_BACK:
         ggml_vk_rope(ctx, compute_ctx, cgraph, node_idx, true);
 
+        break;
+    case GGML_OP_GET_REL_POS:
+        ggml_vk_get_rel_pos(ctx, compute_ctx, src0, node);
+
         break;
     case GGML_OP_ARGSORT:
         if (ctx->num_additional_fused_ops) {
@@ -12006,6 +12041,7 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_cgraph *
     case GGML_OP_SOFT_MAX_BACK:
     case GGML_OP_ROPE:
     case GGML_OP_ROPE_BACK:
+    case GGML_OP_GET_REL_POS:
     case GGML_OP_RESHAPE:
     case GGML_OP_VIEW:
     case GGML_OP_PERMUTE:
@@ -13964,6 +14000,9 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm
             return op->src[0]->type == GGML_TYPE_F32;
         case GGML_OP_LOG:
             return op->src[0]->type == GGML_TYPE_F32 || op->src[0]->type == GGML_TYPE_F16;
+        case GGML_OP_GET_REL_POS:
+            return ggml_is_contiguous(op->src[0]) &&
+                   (op->src[0]->type == GGML_TYPE_F32 || op->src[0]->type == GGML_TYPE_F16);
         case GGML_OP_ARGSORT:
             {
                 if (!ggml_is_contiguous(op) || !ggml_is_contiguous(op->src[0])) {
diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/get_rel_pos.comp b/ggml/src/ggml-vulkan/vulkan-shaders/get_rel_pos.comp
new file mode 100644
index 00000000000..ca037dd7c82
--- /dev/null
+++ b/ggml/src/ggml-vulkan/vulkan-shaders/get_rel_pos.comp
@@ -0,0 +1,33 @@
+#version 450
+
+#include "types.glsl"
+#include "generic_unary_head.glsl"
+
+layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;
+
+void main() {
+    const uint idx = get_idx();
+    if (idx >= p.ne) {
+        return;
+    }
+
+    const uint i13 = fastdiv(idx, p.ne1_012mp, p.ne1_012L);
+    const uint i13_offset = i13 * p.ne12*p.ne11*p.ne10;
+    const uint i12 = fastdiv(idx - i13_offset, p.ne1_01mp, p.ne1_01L);
+    const uint i12_offset = i12*p.ne11*p.ne10;
+    const uint i11 = fastdiv(idx - i13_offset - i12_offset, p.ne1_0mp, p.ne1_0L);
+    const uint i10 = idx - i13_offset - i12_offset - i11*p.ne10;
+
+    const float kh = float(p.ne11);
+    const float qh = float(p.ne12);
+    const float k_scale = max(qh / kh, 1.0f);
+    const float q_scale = max(kh / qh, 1.0f);
+
+    const int pos = int(float(i12)*q_scale - float(i11)*k_scale + (kh - 1.0f)*k_scale);
+
+    const uint src_idx = pos*p.nb01 + i10*p.nb00;
+    const uint dst_idx = i13*p.nb13 + i12*p.nb12 + i11*p.nb11 + i10*p.nb10;
+
+    data_d[get_doffset() + dst_idx] = D_TYPE(data_a[get_aoffset() + src_idx]);
+}
+
diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp b/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp
index bc992068f83..3b031a2b732 100644
--- a/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp
+++ b/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp
@@ -910,6 +910,9 @@ void process_shaders() {
     string_to_spv("rope_vision_f16", "rope_vision.comp", {{"A_TYPE", "float16_t"}, {"ROPE_D_TYPE", "float16_t"}});
     string_to_spv("rope_vision_f16_rte", "rope_vision.comp", {{"A_TYPE", "float16_t"}, {"ROPE_D_TYPE", "float16_t"}, {"RTE16", "1"}});
 
+    string_to_spv("get_rel_pos_f32", "get_rel_pos.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
+    string_to_spv("get_rel_pos_f16", "get_rel_pos.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}});
+
     string_to_spv("argsort_f32", "argsort.comp", {{"A_TYPE", "float"}});
     string_to_spv("argsort_large_f32", "argsort_large.comp", {{"A_TYPE", "float"}});
 

From e94dc95e1677b33608c8031a3d1a25829b4d0435 Mon Sep 17 00:00:00 2001
From: Ethan Wang <ethan@nexa.ai>
Date: Thu, 20 Nov 2025 12:28:00 -0800
Subject: [PATCH 2/5] vulkan: fix floating point precision issue in get_rel_pos
 shader

* Added a small epsilon to the position calculation in the get_rel_pos compute shader to mitigate floating point precision issues. This change ensures more accurate results when computing relative positions based on input tensor dimensions.
---
 ggml/src/ggml-vulkan/vulkan-shaders/get_rel_pos.comp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/get_rel_pos.comp b/ggml/src/ggml-vulkan/vulkan-shaders/get_rel_pos.comp
index ca037dd7c82..3657ce8e110 100644
--- a/ggml/src/ggml-vulkan/vulkan-shaders/get_rel_pos.comp
+++ b/ggml/src/ggml-vulkan/vulkan-shaders/get_rel_pos.comp
@@ -23,7 +23,9 @@ void main() {
     const float k_scale = max(qh / kh, 1.0f);
     const float q_scale = max(kh / qh, 1.0f);
 
-    const int pos = int(float(i12)*q_scale - float(i11)*k_scale + (kh - 1.0f)*k_scale);
+    // Add a small epsilon to avoid floating point precision issues
+    const float epsilon = 0.0001f;
+    const int pos = int(float(i12) * q_scale - float(i11) * k_scale + (kh - 1.0f) * k_scale + epsilon);
 
     const uint src_idx = pos*p.nb01 + i10*p.nb00;
     const uint dst_idx = i13*p.nb13 + i12*p.nb12 + i11*p.nb11 + i10*p.nb10;

From e6eb7db183887b5104d663859a2b3675720e1f54 Mon Sep 17 00:00:00 2001
From: Ethan Wang <ethan@nexa.ai>
Date: Sun, 23 Nov 2025 21:35:27 +0000
Subject: [PATCH 3/5] vulkan: refactored the ggml_vk_get_rel_pos function to
 use ggml_vk_op_f32

---
 ggml/src/ggml-vulkan/ggml-vulkan.cpp | 34 +++++++++-------------------
 1 file changed, 11 insertions(+), 23 deletions(-)

diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index 9460b3c59f9..5cd0b187bc3 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -8616,6 +8616,14 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const
             return ctx->device->pipeline_fill_f32;
         }
         return nullptr;
+    case GGML_OP_GET_REL_POS:
+        if (src0->type == GGML_TYPE_F32) {
+            return ctx->device->pipeline_get_rel_pos_f32;
+        }
+        if (src0->type == GGML_TYPE_F16) {
+            return ctx->device->pipeline_get_rel_pos_f16;
+        }
+        return nullptr;
     default:
         return nullptr;
     }
@@ -8925,6 +8933,7 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context& subctx, co
     case GGML_OP_UNARY:
     case GGML_OP_GLU:
     case GGML_OP_CONV_2D_DW:
+    case GGML_OP_GET_REL_POS:
         {
             uint32_t ne = ggml_nelements(dst);
             if (op == GGML_OP_CPY && ggml_is_quantized(src0->type) && ggml_is_quantized(dst->type)) {
@@ -10024,29 +10033,8 @@ static void ggml_vk_rope(ggml_backend_vk_context * ctx, vk_context& subctx, cons
 }
 
 static void ggml_vk_get_rel_pos(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst) {
-    vk_pipeline pipeline = nullptr;
-    switch (src0->type) {
-        case GGML_TYPE_F32: pipeline = ctx->device->pipeline_get_rel_pos_f32; break;
-        case GGML_TYPE_F16: pipeline = ctx->device->pipeline_get_rel_pos_f16; break;
-        default: GGML_ABORT("fatal error");
-    }
-    GGML_ASSERT(pipeline != nullptr);
-
     vk_op_unary_push_constants pc = vk_op_unary_push_constants_init(src0, dst, ggml_nelements(dst));
-    init_pushconst_fastdiv(pc);
-
-    std::array<uint32_t, 3> elements;
-    uint32_t ne = ggml_nelements(dst);
-    if (ne > 262144) {
-        elements = { 512, 512, CEIL_DIV(ne, 262144) };
-    } else if (ne > 512) {
-        elements = { 512, CEIL_DIV(ne, 512), 1 };
-    } else {
-        elements = { ne, 1, 1 };
-    }
-
-    ggml_pipeline_request_descriptor_sets(ctx, pipeline, 1);
-    ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { ggml_vk_tensor_subbuffer(ctx, src0), ggml_vk_tensor_subbuffer(ctx, dst) }, pc, elements);
+    ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, nullptr, dst, GGML_OP_GET_REL_POS, pc);
 }
 
 static void ggml_vk_argsort(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst) {
@@ -14001,7 +13989,7 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm
         case GGML_OP_LOG:
             return op->src[0]->type == GGML_TYPE_F32 || op->src[0]->type == GGML_TYPE_F16;
         case GGML_OP_GET_REL_POS:
-            return ggml_is_contiguous(op->src[0]) &&
+            return ggml_is_contiguous(op->src[0]) && ggml_vk_dim01_contiguous(op->src[0]) &&
                    (op->src[0]->type == GGML_TYPE_F32 || op->src[0]->type == GGML_TYPE_F16);
         case GGML_OP_ARGSORT:
             {

From fd792734ec7fa716cead2c027dec7304c384557e Mon Sep 17 00:00:00 2001
From: Ethan Wang <ethan@nexa.ai>
Date: Tue, 25 Nov 2025 23:45:04 +0000
Subject: [PATCH 4/5] vulkan: unlock incontiguous support for get_rel_pos

---
 ggml/src/ggml-vulkan/ggml-vulkan.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index 57dd6feac0d..491b87c40f4 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -8671,6 +8671,7 @@ static bool ggml_vk_op_supports_incontiguous(ggml_op op) {
     case GGML_OP_SUM:
     case GGML_OP_SUM_ROWS:
     case GGML_OP_MEAN:
+    case GGML_OP_GET_REL_POS:
         return true;
     default:
         return false;
@@ -13760,8 +13761,7 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm
         case GGML_OP_LOG:
             return op->src[0]->type == GGML_TYPE_F32 || op->src[0]->type == GGML_TYPE_F16;
         case GGML_OP_GET_REL_POS:
-            return ggml_is_contiguous(op->src[0]) && ggml_vk_dim01_contiguous(op->src[0]) &&
-                   (op->src[0]->type == GGML_TYPE_F32 || op->src[0]->type == GGML_TYPE_F16);
+            return (op->src[0]->type == GGML_TYPE_F32 || op->src[0]->type == GGML_TYPE_F16);
         case GGML_OP_ARGSORT:
             {
                 if (!ggml_is_contiguous(op) || !ggml_is_contiguous(op->src[0])) {

From 0cd28b69cff288da52b364bac3b2b587b6012f27 Mon Sep 17 00:00:00 2001
From: Ethan Wang <ethan@nexa.ai>
Date: Wed, 26 Nov 2025 23:48:07 +0000
Subject: [PATCH 5/5] vulkan: enhance get_rel_pos handling for type checks and
 pipeline creation

---
 ggml/src/ggml-vulkan/ggml-vulkan.cpp | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index 491b87c40f4..c725cf66abf 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -3935,8 +3935,8 @@ static void ggml_vk_load_shaders(vk_device& device) {
         ggml_vk_create_pipeline(device, device->pipeline_rope_neox_f32_f16, "rope_neox_f32_f16", rope_neox_f32_f16_len, rope_neox_f32_f16_data, "main", 5, sizeof(vk_op_rope_push_constants), {1, 512, 1}, {}, 1);
     }
 
-    ggml_vk_create_pipeline(device, device->pipeline_get_rel_pos_f32, "get_rel_pos_f32", get_rel_pos_f32_len, get_rel_pos_f32_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, { 512 }, 1);
-    ggml_vk_create_pipeline(device, device->pipeline_get_rel_pos_f16, "get_rel_pos_f16", get_rel_pos_f16_len, get_rel_pos_f16_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, { 512 }, 1);
+    ggml_vk_create_pipeline(device, device->pipeline_get_rel_pos_f32, "get_rel_pos_f32", get_rel_pos_f32_len, get_rel_pos_f32_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);
+    ggml_vk_create_pipeline(device, device->pipeline_get_rel_pos_f16, "get_rel_pos_f16", get_rel_pos_f16_len, get_rel_pos_f16_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);
 
     for (uint32_t i = 0; i < num_argsort_pipelines; ++i) {
         uint32_t BLOCK_SIZE = 1u << std::min(i, device->max_workgroup_size_log2);
@@ -8628,10 +8628,10 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const
         }
         return nullptr;
     case GGML_OP_GET_REL_POS:
-        if (src0->type == GGML_TYPE_F32) {
+        if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {
             return ctx->device->pipeline_get_rel_pos_f32;
         }
-        if (src0->type == GGML_TYPE_F16) {
+        if (src0->type == GGML_TYPE_F16 && dst->type == GGML_TYPE_F16) {
             return ctx->device->pipeline_get_rel_pos_f16;
         }
         return nullptr;
@@ -13761,7 +13761,8 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm
         case GGML_OP_LOG:
             return op->src[0]->type == GGML_TYPE_F32 || op->src[0]->type == GGML_TYPE_F16;
         case GGML_OP_GET_REL_POS:
-            return (op->src[0]->type == GGML_TYPE_F32 || op->src[0]->type == GGML_TYPE_F16);
+            return (op->src[0]->type == GGML_TYPE_F32 && op->type == GGML_TYPE_F32) ||
+                   (op->src[0]->type == GGML_TYPE_F16 && op->type == GGML_TYPE_F16);
         case GGML_OP_ARGSORT:
             {
                 if (!ggml_is_contiguous(op) || !ggml_is_contiguous(op->src[0])) {