From fb431da60182448d83e7b93ac33e98181f433468 Mon Sep 17 00:00:00 2001 From: Ethan Wang Date: Thu, 20 Nov 2025 19:33:35 +0000 Subject: [PATCH 1/5] vulkan: add get_rel_pos operation and corresponding shader * Introduced new Vulkan pipeline for get_rel_pos operation for both float and half-precision types. * Implemented the get_rel_pos compute shader to calculate relative positions based on input tensor dimensions. * Updated shader generation to include new get_rel_pos variants. --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 39 +++++++++++++++++++ .../vulkan-shaders/get_rel_pos.comp | 33 ++++++++++++++++ .../vulkan-shaders/vulkan-shaders-gen.cpp | 3 ++ 3 files changed, 75 insertions(+) create mode 100644 ggml/src/ggml-vulkan/vulkan-shaders/get_rel_pos.comp diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index f83dfdaef66..9460b3c59f9 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -701,6 +701,7 @@ struct vk_device_struct { vk_pipeline pipeline_rope_neox_f32, pipeline_rope_neox_f16, pipeline_rope_neox_f32_f16; vk_pipeline pipeline_rope_multi_f32, pipeline_rope_multi_f16; vk_pipeline pipeline_rope_vision_f32, pipeline_rope_vision_f16; + vk_pipeline pipeline_get_rel_pos_f32, pipeline_get_rel_pos_f16; vk_pipeline pipeline_argsort_f32[num_argsort_pipelines]; vk_pipeline pipeline_argsort_large_f32[num_argsort_pipelines]; vk_pipeline pipeline_sum_rows_f32; @@ -3933,6 +3934,9 @@ static void ggml_vk_load_shaders(vk_device& device) { ggml_vk_create_pipeline(device, device->pipeline_rope_neox_f32_f16, "rope_neox_f32_f16", rope_neox_f32_f16_len, rope_neox_f32_f16_data, "main", 5, sizeof(vk_op_rope_push_constants), {1, 512, 1}, {}, 1); } + ggml_vk_create_pipeline(device, device->pipeline_get_rel_pos_f32, "get_rel_pos_f32", get_rel_pos_f32_len, get_rel_pos_f32_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, { 512 }, 1); + ggml_vk_create_pipeline(device, device->pipeline_get_rel_pos_f16, "get_rel_pos_f16", get_rel_pos_f16_len, get_rel_pos_f16_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, { 512 }, 1); + for (uint32_t i = 0; i < num_argsort_pipelines; ++i) { uint32_t BLOCK_SIZE = 1u << std::min(i, device->max_workgroup_size_log2); if (i <= device->max_workgroup_size_log2 && @@ -10019,6 +10023,32 @@ static void ggml_vk_rope(ggml_backend_vk_context * ctx, vk_context& subctx, cons ggml_vk_make_rope_constants(cgraph->nodes[node_idx], src0, src2 != nullptr, backprop, set_rows_stride)); } +static void ggml_vk_get_rel_pos(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst) { + vk_pipeline pipeline = nullptr; + switch (src0->type) { + case GGML_TYPE_F32: pipeline = ctx->device->pipeline_get_rel_pos_f32; break; + case GGML_TYPE_F16: pipeline = ctx->device->pipeline_get_rel_pos_f16; break; + default: GGML_ABORT("fatal error"); + } + GGML_ASSERT(pipeline != nullptr); + + vk_op_unary_push_constants pc = vk_op_unary_push_constants_init(src0, dst, ggml_nelements(dst)); + init_pushconst_fastdiv(pc); + + std::array elements; + uint32_t ne = ggml_nelements(dst); + if (ne > 262144) { + elements = { 512, 512, CEIL_DIV(ne, 262144) }; + } else if (ne > 512) { + elements = { 512, CEIL_DIV(ne, 512), 1 }; + } else { + elements = { ne, 1, 1 }; + } + + ggml_pipeline_request_descriptor_sets(ctx, pipeline, 1); + ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { ggml_vk_tensor_subbuffer(ctx, src0), ggml_vk_tensor_subbuffer(ctx, dst) }, pc, elements); +} + static void ggml_vk_argsort(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst) { const uint32_t * op_params = (const uint32_t *)dst->op_params; @@ -11488,6 +11518,7 @@ static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_cgraph * cgr case GGML_OP_ROPE_BACK: case GGML_OP_MUL_MAT: case GGML_OP_MUL_MAT_ID: + case GGML_OP_GET_REL_POS: case GGML_OP_ARGSORT: case GGML_OP_SUM: case GGML_OP_SUM_ROWS: @@ -11817,6 +11848,10 @@ static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_cgraph * cgr case GGML_OP_ROPE_BACK: ggml_vk_rope(ctx, compute_ctx, cgraph, node_idx, true); + break; + case GGML_OP_GET_REL_POS: + ggml_vk_get_rel_pos(ctx, compute_ctx, src0, node); + break; case GGML_OP_ARGSORT: if (ctx->num_additional_fused_ops) { @@ -12006,6 +12041,7 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_cgraph * case GGML_OP_SOFT_MAX_BACK: case GGML_OP_ROPE: case GGML_OP_ROPE_BACK: + case GGML_OP_GET_REL_POS: case GGML_OP_RESHAPE: case GGML_OP_VIEW: case GGML_OP_PERMUTE: @@ -13964,6 +14000,9 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm return op->src[0]->type == GGML_TYPE_F32; case GGML_OP_LOG: return op->src[0]->type == GGML_TYPE_F32 || op->src[0]->type == GGML_TYPE_F16; + case GGML_OP_GET_REL_POS: + return ggml_is_contiguous(op->src[0]) && + (op->src[0]->type == GGML_TYPE_F32 || op->src[0]->type == GGML_TYPE_F16); case GGML_OP_ARGSORT: { if (!ggml_is_contiguous(op) || !ggml_is_contiguous(op->src[0])) { diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/get_rel_pos.comp b/ggml/src/ggml-vulkan/vulkan-shaders/get_rel_pos.comp new file mode 100644 index 00000000000..ca037dd7c82 --- /dev/null +++ b/ggml/src/ggml-vulkan/vulkan-shaders/get_rel_pos.comp @@ -0,0 +1,33 @@ +#version 450 + +#include "types.glsl" +#include "generic_unary_head.glsl" + +layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in; + +void main() { + const uint idx = get_idx(); + if (idx >= p.ne) { + return; + } + + const uint i13 = fastdiv(idx, p.ne1_012mp, p.ne1_012L); + const uint i13_offset = i13 * p.ne12*p.ne11*p.ne10; + const uint i12 = fastdiv(idx - i13_offset, p.ne1_01mp, p.ne1_01L); + const uint i12_offset = i12*p.ne11*p.ne10; + const uint i11 = fastdiv(idx - i13_offset - i12_offset, p.ne1_0mp, p.ne1_0L); + const uint i10 = idx - i13_offset - i12_offset - i11*p.ne10; + + const float kh = float(p.ne11); + const float qh = float(p.ne12); + const float k_scale = max(qh / kh, 1.0f); + const float q_scale = max(kh / qh, 1.0f); + + const int pos = int(float(i12)*q_scale - float(i11)*k_scale + (kh - 1.0f)*k_scale); + + const uint src_idx = pos*p.nb01 + i10*p.nb00; + const uint dst_idx = i13*p.nb13 + i12*p.nb12 + i11*p.nb11 + i10*p.nb10; + + data_d[get_doffset() + dst_idx] = D_TYPE(data_a[get_aoffset() + src_idx]); +} + diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp b/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp index bc992068f83..3b031a2b732 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp @@ -910,6 +910,9 @@ void process_shaders() { string_to_spv("rope_vision_f16", "rope_vision.comp", {{"A_TYPE", "float16_t"}, {"ROPE_D_TYPE", "float16_t"}}); string_to_spv("rope_vision_f16_rte", "rope_vision.comp", {{"A_TYPE", "float16_t"}, {"ROPE_D_TYPE", "float16_t"}, {"RTE16", "1"}}); + string_to_spv("get_rel_pos_f32", "get_rel_pos.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}}); + string_to_spv("get_rel_pos_f16", "get_rel_pos.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}}); + string_to_spv("argsort_f32", "argsort.comp", {{"A_TYPE", "float"}}); string_to_spv("argsort_large_f32", "argsort_large.comp", {{"A_TYPE", "float"}}); From e94dc95e1677b33608c8031a3d1a25829b4d0435 Mon Sep 17 00:00:00 2001 From: Ethan Wang Date: Thu, 20 Nov 2025 12:28:00 -0800 Subject: [PATCH 2/5] vulkan: fix floating point precision issue in get_rel_pos shader * Added a small epsilon to the position calculation in the get_rel_pos compute shader to mitigate floating point precision issues. This change ensures more accurate results when computing relative positions based on input tensor dimensions. --- ggml/src/ggml-vulkan/vulkan-shaders/get_rel_pos.comp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/get_rel_pos.comp b/ggml/src/ggml-vulkan/vulkan-shaders/get_rel_pos.comp index ca037dd7c82..3657ce8e110 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/get_rel_pos.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/get_rel_pos.comp @@ -23,7 +23,9 @@ void main() { const float k_scale = max(qh / kh, 1.0f); const float q_scale = max(kh / qh, 1.0f); - const int pos = int(float(i12)*q_scale - float(i11)*k_scale + (kh - 1.0f)*k_scale); + // Add a small epsilon to avoid floating point precision issues + const float epsilon = 0.0001f; + const int pos = int(float(i12) * q_scale - float(i11) * k_scale + (kh - 1.0f) * k_scale + epsilon); const uint src_idx = pos*p.nb01 + i10*p.nb00; const uint dst_idx = i13*p.nb13 + i12*p.nb12 + i11*p.nb11 + i10*p.nb10; From e6eb7db183887b5104d663859a2b3675720e1f54 Mon Sep 17 00:00:00 2001 From: Ethan Wang Date: Sun, 23 Nov 2025 21:35:27 +0000 Subject: [PATCH 3/5] vulkan: refactored the ggml_vk_get_rel_pos function to use ggml_vk_op_f32 --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 34 +++++++++------------------- 1 file changed, 11 insertions(+), 23 deletions(-) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 9460b3c59f9..5cd0b187bc3 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -8616,6 +8616,14 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const return ctx->device->pipeline_fill_f32; } return nullptr; + case GGML_OP_GET_REL_POS: + if (src0->type == GGML_TYPE_F32) { + return ctx->device->pipeline_get_rel_pos_f32; + } + if (src0->type == GGML_TYPE_F16) { + return ctx->device->pipeline_get_rel_pos_f16; + } + return nullptr; default: return nullptr; } @@ -8925,6 +8933,7 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context& subctx, co case GGML_OP_UNARY: case GGML_OP_GLU: case GGML_OP_CONV_2D_DW: + case GGML_OP_GET_REL_POS: { uint32_t ne = ggml_nelements(dst); if (op == GGML_OP_CPY && ggml_is_quantized(src0->type) && ggml_is_quantized(dst->type)) { @@ -10024,29 +10033,8 @@ static void ggml_vk_rope(ggml_backend_vk_context * ctx, vk_context& subctx, cons } static void ggml_vk_get_rel_pos(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst) { - vk_pipeline pipeline = nullptr; - switch (src0->type) { - case GGML_TYPE_F32: pipeline = ctx->device->pipeline_get_rel_pos_f32; break; - case GGML_TYPE_F16: pipeline = ctx->device->pipeline_get_rel_pos_f16; break; - default: GGML_ABORT("fatal error"); - } - GGML_ASSERT(pipeline != nullptr); - vk_op_unary_push_constants pc = vk_op_unary_push_constants_init(src0, dst, ggml_nelements(dst)); - init_pushconst_fastdiv(pc); - - std::array elements; - uint32_t ne = ggml_nelements(dst); - if (ne > 262144) { - elements = { 512, 512, CEIL_DIV(ne, 262144) }; - } else if (ne > 512) { - elements = { 512, CEIL_DIV(ne, 512), 1 }; - } else { - elements = { ne, 1, 1 }; - } - - ggml_pipeline_request_descriptor_sets(ctx, pipeline, 1); - ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { ggml_vk_tensor_subbuffer(ctx, src0), ggml_vk_tensor_subbuffer(ctx, dst) }, pc, elements); + ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, nullptr, dst, GGML_OP_GET_REL_POS, pc); } static void ggml_vk_argsort(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst) { @@ -14001,7 +13989,7 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm case GGML_OP_LOG: return op->src[0]->type == GGML_TYPE_F32 || op->src[0]->type == GGML_TYPE_F16; case GGML_OP_GET_REL_POS: - return ggml_is_contiguous(op->src[0]) && + return ggml_is_contiguous(op->src[0]) && ggml_vk_dim01_contiguous(op->src[0]) && (op->src[0]->type == GGML_TYPE_F32 || op->src[0]->type == GGML_TYPE_F16); case GGML_OP_ARGSORT: { From fd792734ec7fa716cead2c027dec7304c384557e Mon Sep 17 00:00:00 2001 From: Ethan Wang Date: Tue, 25 Nov 2025 23:45:04 +0000 Subject: [PATCH 4/5] vulkan: unlock incontiguous support for get_rel_pos --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 57dd6feac0d..491b87c40f4 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -8671,6 +8671,7 @@ static bool ggml_vk_op_supports_incontiguous(ggml_op op) { case GGML_OP_SUM: case GGML_OP_SUM_ROWS: case GGML_OP_MEAN: + case GGML_OP_GET_REL_POS: return true; default: return false; @@ -13760,8 +13761,7 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm case GGML_OP_LOG: return op->src[0]->type == GGML_TYPE_F32 || op->src[0]->type == GGML_TYPE_F16; case GGML_OP_GET_REL_POS: - return ggml_is_contiguous(op->src[0]) && ggml_vk_dim01_contiguous(op->src[0]) && - (op->src[0]->type == GGML_TYPE_F32 || op->src[0]->type == GGML_TYPE_F16); + return (op->src[0]->type == GGML_TYPE_F32 || op->src[0]->type == GGML_TYPE_F16); case GGML_OP_ARGSORT: { if (!ggml_is_contiguous(op) || !ggml_is_contiguous(op->src[0])) { From 0cd28b69cff288da52b364bac3b2b587b6012f27 Mon Sep 17 00:00:00 2001 From: Ethan Wang Date: Wed, 26 Nov 2025 23:48:07 +0000 Subject: [PATCH 5/5] vulkan: enhance get_rel_pos handling for type checks and pipeline creation --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 491b87c40f4..c725cf66abf 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -3935,8 +3935,8 @@ static void ggml_vk_load_shaders(vk_device& device) { ggml_vk_create_pipeline(device, device->pipeline_rope_neox_f32_f16, "rope_neox_f32_f16", rope_neox_f32_f16_len, rope_neox_f32_f16_data, "main", 5, sizeof(vk_op_rope_push_constants), {1, 512, 1}, {}, 1); } - ggml_vk_create_pipeline(device, device->pipeline_get_rel_pos_f32, "get_rel_pos_f32", get_rel_pos_f32_len, get_rel_pos_f32_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, { 512 }, 1); - ggml_vk_create_pipeline(device, device->pipeline_get_rel_pos_f16, "get_rel_pos_f16", get_rel_pos_f16_len, get_rel_pos_f16_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, { 512 }, 1); + ggml_vk_create_pipeline(device, device->pipeline_get_rel_pos_f32, "get_rel_pos_f32", get_rel_pos_f32_len, get_rel_pos_f32_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1); + ggml_vk_create_pipeline(device, device->pipeline_get_rel_pos_f16, "get_rel_pos_f16", get_rel_pos_f16_len, get_rel_pos_f16_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1); for (uint32_t i = 0; i < num_argsort_pipelines; ++i) { uint32_t BLOCK_SIZE = 1u << std::min(i, device->max_workgroup_size_log2); @@ -8628,10 +8628,10 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const } return nullptr; case GGML_OP_GET_REL_POS: - if (src0->type == GGML_TYPE_F32) { + if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) { return ctx->device->pipeline_get_rel_pos_f32; } - if (src0->type == GGML_TYPE_F16) { + if (src0->type == GGML_TYPE_F16 && dst->type == GGML_TYPE_F16) { return ctx->device->pipeline_get_rel_pos_f16; } return nullptr; @@ -13761,7 +13761,8 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm case GGML_OP_LOG: return op->src[0]->type == GGML_TYPE_F32 || op->src[0]->type == GGML_TYPE_F16; case GGML_OP_GET_REL_POS: - return (op->src[0]->type == GGML_TYPE_F32 || op->src[0]->type == GGML_TYPE_F16); + return (op->src[0]->type == GGML_TYPE_F32 && op->type == GGML_TYPE_F32) || + (op->src[0]->type == GGML_TYPE_F16 && op->type == GGML_TYPE_F16); case GGML_OP_ARGSORT: { if (!ggml_is_contiguous(op) || !ggml_is_contiguous(op->src[0])) {