From 78f755fd60f2406c4c61d8dc19038b0d93e96506 Mon Sep 17 00:00:00 2001
From: J Wyman <jwyman@nvidia.com>
Date: Tue, 25 Nov 2025 15:59:49 -0500
Subject: [PATCH 1/6] feat: PyTorch PT2 Model Generation

This change adds the generation of PT2 format serialized PyTorch model files to the QA model generation scripts.
---
 .../python_based_backends/test.sh             |   1 +
 qa/common/gen_qa_model_repository             |   4 +-
 qa/common/gen_qa_models.py                    | 213 +++++++++++++++++-
 qa/common/gen_qa_torch_aoti_model.py          | 121 ++++++++++
 4 files changed, 337 insertions(+), 2 deletions(-)
 create mode 100644 qa/common/gen_qa_torch_aoti_model.py

diff --git a/qa/L0_backend_python/python_based_backends/test.sh b/qa/L0_backend_python/python_based_backends/test.sh
index c6d55d6ed3..ea9e805109 100755
--- a/qa/L0_backend_python/python_based_backends/test.sh
+++ b/qa/L0_backend_python/python_based_backends/test.sh
@@ -35,6 +35,7 @@ CLIENT_LOG="./python_based_backends_client.log"
 TEST_RESULT_FILE="./test_results.txt"
 CLIENT_PY="./python_based_backends_test.py"
 GEN_PYTORCH_MODEL_PY="../../common/gen_qa_pytorch_model.py"
+GEN_TORCHAOTI_MODEL_PY="../../common/gen_qa_torch_aoti_model.py"
 RET=0
 
 rm -rf ${MODEL_REPOSITORY}
diff --git a/qa/common/gen_qa_model_repository b/qa/common/gen_qa_model_repository
index 2edfcf325d..fa21b56ac5 100755
--- a/qa/common/gen_qa_model_repository
+++ b/qa/common/gen_qa_model_repository
@@ -238,10 +238,12 @@ pip3 install onnxscript
 set -e
 set -x
 python3 $TRITON_MDLS_SRC_DIR/gen_qa_models.py --libtorch --models_dir=$TRITON_MDLS_QA_MODEL
+python3 $TRITON_MDLS_SRC_DIR/gen_qa_models.py --torch-aoti --models_dir=$TRITON_MDLS_QA_MODEL
 chmod -R 777 $TRITON_MDLS_QA_MODEL
 python3 $TRITON_MDLS_SRC_DIR/gen_qa_models.py --libtorch --variable --models_dir=$TRITON_MDLS_QA_VARIABLE_MODEL
+python3 $TRITON_MDLS_SRC_DIR/gen_qa_models.py --torch-aoti --variable --models_dir=$TRITON_MDLS_QA_VARIABLE_MODEL
 chmod -R 777 $TRITON_MDLS_QA_VARIABLE_MODEL
-python3 $TRITON_MDLS_SRC_DIR/gen_qa_identity_models.py --libtorch --models_dir=$TRITON_MDLS_QA_IDENTITY_MODEL
+python3 $TRITON_MDLS_SRC_DIR/gen_qa_identity_models.py --libtorch --models_dir=$VOLUME_IDENTITYDESTDIR
 chmod -R 777 $TRITON_MDLS_QA_IDENTITY_MODEL
 python3 $TRITON_MDLS_SRC_DIR/gen_qa_reshape_models.py --libtorch --variable --models_dir=$TRITON_MDLS_QA_RESHAPE_MODEL
 chmod -R 777 $TRITON_MDLS_QA_RESHAPE_MODEL
diff --git a/qa/common/gen_qa_models.py b/qa/common/gen_qa_models.py
index cfce75be39..75ade739d6 100755
--- a/qa/common/gen_qa_models.py
+++ b/qa/common/gen_qa_models.py
@@ -1028,6 +1028,7 @@ def create_libtorch_modelfile(
     output1_dtype,
     swap=False,
 ):
+    print("<<<< create_libtorch_modelfile >>>>", flush=True)
     if not tu.validate_for_libtorch_model(
         input_dtype,
         output0_dtype,
@@ -1283,6 +1284,61 @@ def forward(self, INPUT0, INPUT1):
     traced.save(model_version_dir + "/model.pt")
 
 
+def create_torch_aoti_modelfile(
+    models_dir,
+    max_batch,
+    model_version,
+    input_shape,
+    output0_shape,
+    output1_shape,
+    input_dtype,
+    output0_dtype,
+    output1_dtype,
+    swap=False,
+):
+    print("<<<< create_torch_inductor_modelfile >>>>", flush=True)
+    if not tu.validate_for_libtorch_model(
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+        input_shape,
+        output0_shape,
+        output1_shape,
+        max_batch,
+    ):
+        return
+
+    model_name = tu.get_model_name(
+        "torch_aoti",
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+    )
+    # handle for -1 (when variable) since can't create tensor with shape of [-1]
+    input_shape = [abs(ips) for ips in input_shape]
+
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError:
+        pass  # ignore existing dir
+
+    class AddSubNet2(nn.Module):
+        def __init__(self, swap):
+            self.swap = swap
+            super(AddSubNet2, self).__init__()
+        def forward(self, INPUT0, INPUT1):
+            op0 = (INPUT0 - INPUT1) if self.swap else (INPUT0 + INPUT1)
+            op1 = (INPUT0 + INPUT1) if self.swap else (INPUT0 - INPUT1)
+            return op0, op1
+
+    ep = torch.export.export(
+        AddSubNet2(swap), (torch.randn(*input_shape), torch.randn(*input_shape))
+    )
+    torch.export.save(ep, model_version_dir + "/model.pt2")
+
+
 def create_libtorch_modelconfig(
     models_dir,
     max_batch,
@@ -1296,6 +1352,7 @@ def create_libtorch_modelconfig(
     output0_label_cnt,
     version_policy,
 ):
+    print("<<<< create_libtorch_modelconfig >>>>", flush=True)
     if not tu.validate_for_libtorch_model(
         input_dtype,
         output0_dtype,
@@ -1383,6 +1440,110 @@ def create_libtorch_modelconfig(
             lfile.write("label" + str(l) + "\n")
 
 
+def create_torch_aoti_modelconfig(
+    models_dir,
+    max_batch,
+    model_version,
+    input_shape,
+    output0_shape,
+    output1_shape,
+    input_dtype,
+    output0_dtype,
+    output1_dtype,
+    output0_label_cnt,
+    version_policy,
+):
+    print("<<<< create_torch_aoti_modelconfig >>>>", flush=True)
+    if max_batch <= 0:
+        raise ValueError("torch aot inductor model must have max_batch > 0")
+    if not tu.validate_for_libtorch_model(
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+        input_shape,
+        output0_shape,
+        output1_shape,
+        max_batch,
+    ):
+        return
+
+    # Unpack version policy
+    version_policy_str = "{ latest { num_versions: 1 }}"
+    if version_policy is not None:
+        type, val = version_policy
+        if type == "latest":
+            version_policy_str = "{{ latest {{ num_versions: {} }}}}".format(val)
+        elif type == "specific":
+            version_policy_str = "{{ specific {{ versions: {} }}}}".format(val)
+        else:
+            version_policy_str = "{ all { }}"
+
+    # Use a different model name for the non-batching variant
+    model_name = tu.get_model_name(
+        "torch_aoti",
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+    )
+    config_dir = models_dir + "/" + model_name
+    config = """
+backend: "pytorch"
+name: "{}"
+platform: "torch_aoti"
+max_batch_size: {}
+version_policy: {}
+input [
+  {{
+    name: "INPUT0"
+    data_type: {}
+    dims: [ {} ]
+  }},
+  {{
+    name: "INPUT1"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+output [
+  {{
+    name: "OUTPUT__0"
+    data_type: {}
+    dims: [ {} ]
+    label_filename: "output0_labels.txt"
+  }},
+  {{
+    name: "OUTPUT__1"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+""".format(
+        model_name,
+        max_batch,
+        version_policy_str,
+        np_to_model_dtype(input_dtype),
+        tu.shape_to_dims_str(input_shape),
+        np_to_model_dtype(input_dtype),
+        tu.shape_to_dims_str(input_shape),
+        np_to_model_dtype(output0_dtype),
+        tu.shape_to_dims_str(output0_shape),
+        np_to_model_dtype(output1_dtype),
+        tu.shape_to_dims_str(output1_shape),
+    )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+    with open(config_dir + "/output0_labels.txt", "w") as lfile:
+        for l in range(output0_label_cnt):
+            lfile.write("label" + str(l) + "\n")
+
+
 def create_openvino_modelfile(
     models_dir,
     max_batch,
@@ -1743,6 +1904,33 @@ def create_models(
             output1_dtype,
         )
 
+    if FLAGS.torch_aoti:
+        # max-batch 8
+        create_torch_aoti_modelconfig(
+            models_dir,
+            8,
+            model_version,
+            input_shape,
+            output0_shape,
+            output1_shape,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            output0_label_cnt,
+            version_policy,
+        )
+        create_torch_aoti_modelfile(
+            models_dir,
+            8,
+            model_version,
+            input_shape,
+            output0_shape,
+            output1_shape,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+        )
+
     if FLAGS.openvino:
         # max-batch 8
         create_openvino_modelconfig(
@@ -1933,6 +2121,12 @@ def create_fixed_models(
         action="store_true",
         help="Generate Pytorch LibTorch models",
     )
+    parser.add_argument(
+        "--torch-aoti",
+        required=False,
+        action="store_true",
+        help="Generate Pytorch LibTorch models using PT2",
+    )
     parser.add_argument(
         "--openvino",
         required=False,
@@ -1959,7 +2153,7 @@ def create_fixed_models(
         import tensorrt as trt
     if FLAGS.onnx:
         import onnx
-    if FLAGS.libtorch:
+    if FLAGS.libtorch or FLAGS.torch_aoti:
         import torch
         from torch import nn
     if FLAGS.openvino:
@@ -2116,6 +2310,7 @@ def create_fixed_models(
                 create_onnx_modelfile(
                     FLAGS.models_dir, 0, 3, (16,), (16,), (16,), vt, vt, vt, swap=True
                 )
+
         if FLAGS.libtorch:
             for vt in [np.float32, np.int32, np.int16, np.int8]:
                 create_libtorch_modelfile(
@@ -2130,6 +2325,22 @@ def create_fixed_models(
                 create_libtorch_modelfile(
                     FLAGS.models_dir, 0, 3, (16,), (16,), (16,), vt, vt, vt, swap=True
                 )
+
+        if FLAGS.torch_aoti:
+            for vt in [np.float32, np.int32, np.int16, np.int8]:
+                create_torch_aoti_modelfile(
+                    FLAGS.models_dir, 8, 2, (16,), (16,), (16,), vt, vt, vt, swap=True
+                )
+                create_torch_aoti_modelfile(
+                    FLAGS.models_dir, 8, 3, (16,), (16,), (16,), vt, vt, vt, swap=True
+                )
+                create_torch_aoti_modelfile(
+                    FLAGS.models_dir, 0, 2, (16,), (16,), (16,), vt, vt, vt, swap=True
+                )
+                create_torch_aoti_modelfile(
+                    FLAGS.models_dir, 0, 3, (16,), (16,), (16,), vt, vt, vt, swap=True
+                )
+
         if FLAGS.openvino:
             for vt in [np.float16, np.float32, np.int8, np.int16, np.int32]:
                 create_openvino_modelfile(
diff --git a/qa/common/gen_qa_torch_aoti_model.py b/qa/common/gen_qa_torch_aoti_model.py
new file mode 100644
index 0000000000..9620281c1b
--- /dev/null
+++ b/qa/common/gen_qa_torch_aoti_model.py
@@ -0,0 +1,121 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+import argparse
+import os
+
+import torch
+from torch import nn
+
+
+class AddSubNet(nn.Module):
+    def __init__(self):
+        super(AddSubNet, self).__init__()
+
+    def forward(self, input0, input1):
+        return (input0 + input1), (input0 - input1)
+
+
+def generate_model(model_dir):
+    model = AddSubNet()
+
+    os.makedirs(model_dir, exist_ok=True)
+    model_path = os.path.join(model_dir, "model.pt2")
+
+    ep = torch.export.export(model, (torch.randn(4), torch.randn(4)),)
+
+    torch.export.save(ep, model_path)
+
+
+def generate_config(config_path):
+    with open(f"{config_path}/config.pbtxt", "w") as f:
+        f.write(
+            """
+backend: "pytorch"
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+input [
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+"""
+        )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-m",
+        "--model-directory",
+        type=str,
+        required=True,
+        help="The path to the model repository.",
+    )
+    parser.add_argument(
+        "--model-name",
+        type=str,
+        required=False,
+        default="add_sub_pytorch",
+        help="Model name",
+    )
+    parser.add_argument(
+        "--version",
+        type=str,
+        required=False,
+        default="1",
+        help="Model version",
+    )
+
+    args = parser.parse_args()
+
+    model_directory = os.path.join(args.model_directory, args.model_name)
+    os.makedirs(model_directory, exist_ok=True)
+
+    generate_model(model_dir=os.path.join(model_directory, args.version))
+    generate_config(model_directory)

From c98a83708117cb097297a3c0e3d0ec18459ef59e Mon Sep 17 00:00:00 2001
From: J Wyman <jwyman@nvidia.com>
Date: Thu, 4 Dec 2025 13:42:54 -0500
Subject: [PATCH 2/6] add backend: pytorch to model config generation

---
 qa/common/gen_qa_models.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/qa/common/gen_qa_models.py b/qa/common/gen_qa_models.py
index 75ade739d6..a2ab6437d1 100755
--- a/qa/common/gen_qa_models.py
+++ b/qa/common/gen_qa_models.py
@@ -1384,6 +1384,7 @@ def create_libtorch_modelconfig(
     )
     config_dir = models_dir + "/" + model_name
     config = """
+backend: "pytorch"
 name: "{}"
 platform: "pytorch_libtorch"
 max_batch_size: {}

From 5540e489be654fc823e3c166618acabfbd57b8e3 Mon Sep 17 00:00:00 2001
From: J Wyman <jwyman@nvidia.com>
Date: Thu, 4 Dec 2025 14:40:36 -0500
Subject: [PATCH 3/6] fix pedantic style complaints and very valid copyright
 mistakes

---
 qa/L0_backend_python/python_based_backends/test.sh | 2 +-
 qa/common/gen_qa_models.py                         | 1 +
 qa/common/gen_qa_torch_aoti_model.py               | 7 +++++--
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/qa/L0_backend_python/python_based_backends/test.sh b/qa/L0_backend_python/python_based_backends/test.sh
index ea9e805109..b206feeda7 100755
--- a/qa/L0_backend_python/python_based_backends/test.sh
+++ b/qa/L0_backend_python/python_based_backends/test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
diff --git a/qa/common/gen_qa_models.py b/qa/common/gen_qa_models.py
index a2ab6437d1..a92d0b406a 100755
--- a/qa/common/gen_qa_models.py
+++ b/qa/common/gen_qa_models.py
@@ -1328,6 +1328,7 @@ class AddSubNet2(nn.Module):
         def __init__(self, swap):
             self.swap = swap
             super(AddSubNet2, self).__init__()
+
         def forward(self, INPUT0, INPUT1):
             op0 = (INPUT0 - INPUT1) if self.swap else (INPUT0 + INPUT1)
             op1 = (INPUT0 + INPUT1) if self.swap else (INPUT0 - INPUT1)
diff --git a/qa/common/gen_qa_torch_aoti_model.py b/qa/common/gen_qa_torch_aoti_model.py
index 9620281c1b..ce7e97b6d7 100644
--- a/qa/common/gen_qa_torch_aoti_model.py
+++ b/qa/common/gen_qa_torch_aoti_model.py
@@ -1,4 +1,4 @@
-# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -46,7 +46,10 @@ def generate_model(model_dir):
     os.makedirs(model_dir, exist_ok=True)
     model_path = os.path.join(model_dir, "model.pt2")
 
-    ep = torch.export.export(model, (torch.randn(4), torch.randn(4)),)
+    ep = torch.export.export(
+      model,
+      (torch.randn(4), torch.randn(4)),
+    )
 
     torch.export.save(ep, model_path)
 

From c46edce35b78e7f713b35dbf740dbc3c726cf0e9 Mon Sep 17 00:00:00 2001
From: J Wyman <jwyman@nvidia.com>
Date: Thu, 4 Dec 2025 18:35:09 -0500
Subject: [PATCH 4/6] WIP

---
 qa/common/gen_qa_models.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/qa/common/gen_qa_models.py b/qa/common/gen_qa_models.py
index a92d0b406a..00131fd694 100755
--- a/qa/common/gen_qa_models.py
+++ b/qa/common/gen_qa_models.py
@@ -1028,7 +1028,6 @@ def create_libtorch_modelfile(
     output1_dtype,
     swap=False,
 ):
-    print("<<<< create_libtorch_modelfile >>>>", flush=True)
     if not tu.validate_for_libtorch_model(
         input_dtype,
         output0_dtype,
@@ -1296,7 +1295,6 @@ def create_torch_aoti_modelfile(
     output1_dtype,
     swap=False,
 ):
-    print("<<<< create_torch_inductor_modelfile >>>>", flush=True)
     if not tu.validate_for_libtorch_model(
         input_dtype,
         output0_dtype,
@@ -1353,7 +1351,6 @@ def create_libtorch_modelconfig(
     output0_label_cnt,
     version_policy,
 ):
-    print("<<<< create_libtorch_modelconfig >>>>", flush=True)
     if not tu.validate_for_libtorch_model(
         input_dtype,
         output0_dtype,
@@ -1455,7 +1452,6 @@ def create_torch_aoti_modelconfig(
     output0_label_cnt,
     version_policy,
 ):
-    print("<<<< create_torch_aoti_modelconfig >>>>", flush=True)
     if max_batch <= 0:
         raise ValueError("torch aot inductor model must have max_batch > 0")
     if not tu.validate_for_libtorch_model(

From 049480e3c00cbceab99e1424d6722fa568cc7086 Mon Sep 17 00:00:00 2001
From: J Wyman <jwyman@nvidia.com>
Date: Fri, 5 Dec 2025 18:11:02 -0500
Subject: [PATCH 5/6] pedantic style cop fix

---
 qa/common/gen_qa_torch_aoti_model.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/qa/common/gen_qa_torch_aoti_model.py b/qa/common/gen_qa_torch_aoti_model.py
index ce7e97b6d7..76137430e7 100644
--- a/qa/common/gen_qa_torch_aoti_model.py
+++ b/qa/common/gen_qa_torch_aoti_model.py
@@ -47,8 +47,8 @@ def generate_model(model_dir):
     model_path = os.path.join(model_dir, "model.pt2")
 
     ep = torch.export.export(
-      model,
-      (torch.randn(4), torch.randn(4)),
+        model,
+        (torch.randn(4), torch.randn(4)),
     )
 
     torch.export.save(ep, model_path)

From fb4ad2e91ac78791e142b6b6dc37155ffca20511 Mon Sep 17 00:00:00 2001
From: J Wyman <jwyman@nvidia.com>
Date: Mon, 8 Dec 2025 19:13:56 -0500
Subject: [PATCH 6/6] fix variable name error after rebase

---
 qa/common/gen_qa_model_repository | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/qa/common/gen_qa_model_repository b/qa/common/gen_qa_model_repository
index fa21b56ac5..6952b1ee55 100755
--- a/qa/common/gen_qa_model_repository
+++ b/qa/common/gen_qa_model_repository
@@ -243,7 +243,7 @@ chmod -R 777 $TRITON_MDLS_QA_MODEL
 python3 $TRITON_MDLS_SRC_DIR/gen_qa_models.py --libtorch --variable --models_dir=$TRITON_MDLS_QA_VARIABLE_MODEL
 python3 $TRITON_MDLS_SRC_DIR/gen_qa_models.py --torch-aoti --variable --models_dir=$TRITON_MDLS_QA_VARIABLE_MODEL
 chmod -R 777 $TRITON_MDLS_QA_VARIABLE_MODEL
-python3 $TRITON_MDLS_SRC_DIR/gen_qa_identity_models.py --libtorch --models_dir=$VOLUME_IDENTITYDESTDIR
+python3 $TRITON_MDLS_SRC_DIR/gen_qa_identity_models.py --libtorch --models_dir=$TRITON_MDLS_QA_IDENTITY_MODEL
 chmod -R 777 $TRITON_MDLS_QA_IDENTITY_MODEL
 python3 $TRITON_MDLS_SRC_DIR/gen_qa_reshape_models.py --libtorch --variable --models_dir=$TRITON_MDLS_QA_RESHAPE_MODEL
 chmod -R 777 $TRITON_MDLS_QA_RESHAPE_MODEL