[ez][ET-VK] Small fix for choose_qparams_affine_impl

ssjia · ssjia · commit 2b09423238fd · 2025-12-10T13:36:54.000-08:00
It seems that `choose_qparams_affine` has recently appended some arguments to the schema. This causes newly exported models to break because at runtime, the output arg can no longer be found. Fix by locating the output argument as the last entry in the args vector, rather than continuously incrementing the args index. Update quantize/dequantize ops as well since it seems quantized_decomposed namespace ops are subject to change in the future. Note that it would be good to do this for all operators in the Vulkan backend as a later refactor. Differential Revision: [D88887463](https://our.internmc.facebook.com/intern/diff/D88887463/) [ghstack-poisoned]
diff --git a/backends/vulkan/runtime/graph/ops/impl/ChooseQParams.cpp b/backends/vulkan/runtime/graph/ops/impl/ChooseQParams.cpp
@@ -158,7 +158,8 @@ bool can_use_choose_qparams_per_row(
 void choose_qparams_affine_impl(
     ComputeGraph& graph,
     const std::vector<ValueRef>& args) {
-  int arg_idx = 0;
+  size_t arg_idx = 0;
+  size_t last_arg_idx = args.size() - 1;
   const ValueRef input = args[arg_idx++];
   const ValueRef mapping_type = args[arg_idx++];
   (void)mapping_type;
@@ -170,7 +171,8 @@ void choose_qparams_affine_impl(
   (void)eps;
   const ValueRef scale_dtype = args[arg_idx++];
   const ValueRef zero_point_dtype = args[arg_idx++];
-  const ValueRef out_tuple_ref = args[arg_idx++];
+
+  const ValueRef out_tuple_ref = args[last_arg_idx];
 
   // Suppress unused variable warnings
   (void)target_dtype;
diff --git a/backends/vulkan/runtime/graph/ops/impl/QuantizeDequantize.cpp b/backends/vulkan/runtime/graph/ops/impl/QuantizeDequantize.cpp
@@ -369,7 +369,8 @@ void add_unpack_4w4c_and_dequantize_node(
 void quantize_per_tensor_impl(
     ComputeGraph& graph,
     const std::vector<ValueRef>& args) {
-  int32_t arg_idx = 0;
+  size_t arg_idx = 0;
+  size_t last_arg_idx = args.size() - 1;
   const ValueRef fp_input = args[arg_idx++];
   const ValueRef scale = args[arg_idx++];
   const ValueRef zero_point = args[arg_idx++];
@@ -380,7 +381,7 @@ void quantize_per_tensor_impl(
   const ValueRef dtype = args[arg_idx++];
   (void)dtype;
 
-  const ValueRef int8_output = args[arg_idx++];
+  const ValueRef int8_output = args[last_arg_idx];
 
   VK_CHECK_COND(
       graph.estimate_memory_layout_of(int8_output) == utils::kPackedInt8_4W4C);
@@ -392,7 +393,8 @@ void quantize_per_tensor_impl(
 void dequantize_per_tensor_impl(
     ComputeGraph& graph,
     const std::vector<ValueRef>& args) {
-  int32_t arg_idx = 0;
+  size_t arg_idx = 0;
+  size_t last_arg_idx = args.size() - 1;
   const ValueRef int8_input = args[arg_idx++];
   const ValueRef scale = args[arg_idx++];
   const ValueRef zero_point = args[arg_idx++];
@@ -405,7 +407,7 @@ void dequantize_per_tensor_impl(
   const ValueRef output_dtype = args[arg_idx++];
   (void)output_dtype;
 
-  const ValueRef fp_output = args[arg_idx++];
+  const ValueRef fp_output = args[last_arg_idx];
 
   VK_CHECK_COND(
       graph.estimate_memory_layout_of(int8_input) == utils::kPackedInt8_4W4C);