From 2b09423238fd076b73b593595da15afff68c992c Mon Sep 17 00:00:00 2001
From: ssjia <ssjia@devvm1479.ncg0.facebook.com>
Date: Wed, 10 Dec 2025 13:36:54 -0800
Subject: [PATCH 1/2] [ez][ET-VK] Small fix for choose_qparams_affine_impl

It seems that `choose_qparams_affine` has recently appended some arguments to the schema. This causes newly exported models to break because at runtime, the output arg can no longer be found.

Fix by locating the output argument as the last entry in the args vector, rather than continuously incrementing the args index.

Update quantize/dequantize ops as well since it seems quantized_decomposed namespace ops are subject to change in the future.

Note that it would be good to do this for all operators in the Vulkan backend as a later refactor.

Differential Revision: [D88887463](https://our.internmc.facebook.com/intern/diff/D88887463/)

[ghstack-poisoned]
---
 .../vulkan/runtime/graph/ops/impl/ChooseQParams.cpp    |  6 ++++--
 .../runtime/graph/ops/impl/QuantizeDequantize.cpp      | 10 ++++++----
 2 files changed, 10 insertions(+), 6 deletions(-)
diff --git a/backends/vulkan/runtime/graph/ops/impl/ChooseQParams.cpp b/backends/vulkan/runtime/graph/ops/impl/ChooseQParams.cpp
index a36660e0aca..5b8615e0a70 100644
--- a/backends/vulkan/runtime/graph/ops/impl/ChooseQParams.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/ChooseQParams.cpp
@@ -158,7 +158,8 @@ bool can_use_choose_qparams_per_row(
 void choose_qparams_affine_impl(
     ComputeGraph& graph,
     const std::vector<ValueRef>& args) {
-  int arg_idx = 0;
+  size_t arg_idx = 0;
+  size_t last_arg_idx = args.size() - 1;
   const ValueRef input = args[arg_idx++];
   const ValueRef mapping_type = args[arg_idx++];
   (void)mapping_type;
@@ -170,7 +171,8 @@ void choose_qparams_affine_impl(
   (void)eps;
   const ValueRef scale_dtype = args[arg_idx++];
   const ValueRef zero_point_dtype = args[arg_idx++];
-  const ValueRef out_tuple_ref = args[arg_idx++];
+
+  const ValueRef out_tuple_ref = args[last_arg_idx];
 
   // Suppress unused variable warnings
   (void)target_dtype;
diff --git a/backends/vulkan/runtime/graph/ops/impl/QuantizeDequantize.cpp b/backends/vulkan/runtime/graph/ops/impl/QuantizeDequantize.cpp
index ee8f8a1afb4..8ebbf6dcb99 100644
--- a/backends/vulkan/runtime/graph/ops/impl/QuantizeDequantize.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/QuantizeDequantize.cpp
@@ -369,7 +369,8 @@ void add_unpack_4w4c_and_dequantize_node(
 void quantize_per_tensor_impl(
     ComputeGraph& graph,
     const std::vector<ValueRef>& args) {
-  int32_t arg_idx = 0;
+  size_t arg_idx = 0;
+  size_t last_arg_idx = args.size() - 1;
   const ValueRef fp_input = args[arg_idx++];
   const ValueRef scale = args[arg_idx++];
   const ValueRef zero_point = args[arg_idx++];
@@ -380,7 +381,7 @@ void quantize_per_tensor_impl(
   const ValueRef dtype = args[arg_idx++];
   (void)dtype;
 
-  const ValueRef int8_output = args[arg_idx++];
+  const ValueRef int8_output = args[last_arg_idx];
 
   VK_CHECK_COND(
       graph.estimate_memory_layout_of(int8_output) == utils::kPackedInt8_4W4C);
@@ -392,7 +393,8 @@ void quantize_per_tensor_impl(
 void dequantize_per_tensor_impl(
     ComputeGraph& graph,
     const std::vector<ValueRef>& args) {
-  int32_t arg_idx = 0;
+  size_t arg_idx = 0;
+  size_t last_arg_idx = args.size() - 1;
   const ValueRef int8_input = args[arg_idx++];
   const ValueRef scale = args[arg_idx++];
   const ValueRef zero_point = args[arg_idx++];
@@ -405,7 +407,7 @@ void dequantize_per_tensor_impl(
   const ValueRef output_dtype = args[arg_idx++];
   (void)output_dtype;
 
-  const ValueRef fp_output = args[arg_idx++];
+  const ValueRef fp_output = args[last_arg_idx];
 
   VK_CHECK_COND(
       graph.estimate_memory_layout_of(int8_input) == utils::kPackedInt8_4W4C);

From c9abd591ae6480e096e6647616014c96554a3466 Mon Sep 17 00:00:00 2001
From: ssjia <ssjia@devvm1479.ncg0.facebook.com>
Date: Wed, 10 Dec 2025 14:05:29 -0800
Subject: [PATCH 2/2] Update on "[ez][ET-VK] Small fix for
 choose_qparams_affine_impl"

It seems that `choose_qparams_affine` has recently appended some arguments to the schema. This causes newly exported models to break because at runtime, the output arg can no longer be found.

Fix by locating the output argument as the last entry in the args vector, rather than continuously incrementing the args index.

Update quantize/dequantize ops as well since it seems quantized_decomposed namespace ops are subject to change in the future.

Note that it would be good to do this for all operators in the Vulkan backend as a later refactor.

Differential Revision: [D88887463](https://our.internmc.facebook.com/intern/diff/D88887463/)

[ghstack-poisoned]