From 78f755fd60f2406c4c61d8dc19038b0d93e96506 Mon Sep 17 00:00:00 2001 From: J Wyman Date: Tue, 25 Nov 2025 15:59:49 -0500 Subject: [PATCH 1/6] feat: PyTorch PT2 Model Generation This change adds the generation of PT2 format serialized PyTorch model files to the QA model generation scripts. --- .../python_based_backends/test.sh | 1 + qa/common/gen_qa_model_repository | 4 +- qa/common/gen_qa_models.py | 213 +++++++++++++++++- qa/common/gen_qa_torch_aoti_model.py | 121 ++++++++++ 4 files changed, 337 insertions(+), 2 deletions(-) create mode 100644 qa/common/gen_qa_torch_aoti_model.py diff --git a/qa/L0_backend_python/python_based_backends/test.sh b/qa/L0_backend_python/python_based_backends/test.sh index c6d55d6ed3..ea9e805109 100755 --- a/qa/L0_backend_python/python_based_backends/test.sh +++ b/qa/L0_backend_python/python_based_backends/test.sh @@ -35,6 +35,7 @@ CLIENT_LOG="./python_based_backends_client.log" TEST_RESULT_FILE="./test_results.txt" CLIENT_PY="./python_based_backends_test.py" GEN_PYTORCH_MODEL_PY="../../common/gen_qa_pytorch_model.py" +GEN_TORCHAOTI_MODEL_PY="../../common/gen_qa_torch_aoti_model.py" RET=0 rm -rf ${MODEL_REPOSITORY} diff --git a/qa/common/gen_qa_model_repository b/qa/common/gen_qa_model_repository index 2edfcf325d..fa21b56ac5 100755 --- a/qa/common/gen_qa_model_repository +++ b/qa/common/gen_qa_model_repository @@ -238,10 +238,12 @@ pip3 install onnxscript set -e set -x python3 $TRITON_MDLS_SRC_DIR/gen_qa_models.py --libtorch --models_dir=$TRITON_MDLS_QA_MODEL +python3 $TRITON_MDLS_SRC_DIR/gen_qa_models.py --torch-aoti --models_dir=$TRITON_MDLS_QA_MODEL chmod -R 777 $TRITON_MDLS_QA_MODEL python3 $TRITON_MDLS_SRC_DIR/gen_qa_models.py --libtorch --variable --models_dir=$TRITON_MDLS_QA_VARIABLE_MODEL +python3 $TRITON_MDLS_SRC_DIR/gen_qa_models.py --torch-aoti --variable --models_dir=$TRITON_MDLS_QA_VARIABLE_MODEL chmod -R 777 $TRITON_MDLS_QA_VARIABLE_MODEL -python3 $TRITON_MDLS_SRC_DIR/gen_qa_identity_models.py --libtorch --models_dir=$TRITON_MDLS_QA_IDENTITY_MODEL +python3 $TRITON_MDLS_SRC_DIR/gen_qa_identity_models.py --libtorch --models_dir=$VOLUME_IDENTITYDESTDIR chmod -R 777 $TRITON_MDLS_QA_IDENTITY_MODEL python3 $TRITON_MDLS_SRC_DIR/gen_qa_reshape_models.py --libtorch --variable --models_dir=$TRITON_MDLS_QA_RESHAPE_MODEL chmod -R 777 $TRITON_MDLS_QA_RESHAPE_MODEL diff --git a/qa/common/gen_qa_models.py b/qa/common/gen_qa_models.py index cfce75be39..75ade739d6 100755 --- a/qa/common/gen_qa_models.py +++ b/qa/common/gen_qa_models.py @@ -1028,6 +1028,7 @@ def create_libtorch_modelfile( output1_dtype, swap=False, ): + print("<<<< create_libtorch_modelfile >>>>", flush=True) if not tu.validate_for_libtorch_model( input_dtype, output0_dtype, @@ -1283,6 +1284,61 @@ def forward(self, INPUT0, INPUT1): traced.save(model_version_dir + "/model.pt") +def create_torch_aoti_modelfile( + models_dir, + max_batch, + model_version, + input_shape, + output0_shape, + output1_shape, + input_dtype, + output0_dtype, + output1_dtype, + swap=False, +): + print("<<<< create_torch_inductor_modelfile >>>>", flush=True) + if not tu.validate_for_libtorch_model( + input_dtype, + output0_dtype, + output1_dtype, + input_shape, + output0_shape, + output1_shape, + max_batch, + ): + return + + model_name = tu.get_model_name( + "torch_aoti", + input_dtype, + output0_dtype, + output1_dtype, + ) + # handle for -1 (when variable) since can't create tensor with shape of [-1] + input_shape = [abs(ips) for ips in input_shape] + + model_version_dir = models_dir + "/" + model_name + "/" + str(model_version) + + try: + os.makedirs(model_version_dir) + except OSError: + pass # ignore existing dir + + class AddSubNet2(nn.Module): + def __init__(self, swap): + self.swap = swap + super(AddSubNet2, self).__init__() + def forward(self, INPUT0, INPUT1): + op0 = (INPUT0 - INPUT1) if self.swap else (INPUT0 + INPUT1) + op1 = (INPUT0 + INPUT1) if self.swap else (INPUT0 - INPUT1) + return op0, op1 + + ep = torch.export.export( + AddSubNet2(swap), (torch.randn(*input_shape), torch.randn(*input_shape)) + ) + torch.export.save(ep, model_version_dir + "/model.pt2") + + def create_libtorch_modelconfig( models_dir, max_batch, @@ -1296,6 +1352,7 @@ def create_libtorch_modelconfig( output0_label_cnt, version_policy, ): + print("<<<< create_libtorch_modelconfig >>>>", flush=True) if not tu.validate_for_libtorch_model( input_dtype, output0_dtype, @@ -1383,6 +1440,110 @@ def create_libtorch_modelconfig( lfile.write("label" + str(l) + "\n") +def create_torch_aoti_modelconfig( + models_dir, + max_batch, + model_version, + input_shape, + output0_shape, + output1_shape, + input_dtype, + output0_dtype, + output1_dtype, + output0_label_cnt, + version_policy, +): + print("<<<< create_torch_aoti_modelconfig >>>>", flush=True) + if max_batch <= 0: + raise ValueError("torch aot inductor model must have max_batch > 0") + if not tu.validate_for_libtorch_model( + input_dtype, + output0_dtype, + output1_dtype, + input_shape, + output0_shape, + output1_shape, + max_batch, + ): + return + + # Unpack version policy + version_policy_str = "{ latest { num_versions: 1 }}" + if version_policy is not None: + type, val = version_policy + if type == "latest": + version_policy_str = "{{ latest {{ num_versions: {} }}}}".format(val) + elif type == "specific": + version_policy_str = "{{ specific {{ versions: {} }}}}".format(val) + else: + version_policy_str = "{ all { }}" + + # Use a different model name for the non-batching variant + model_name = tu.get_model_name( + "torch_aoti", + input_dtype, + output0_dtype, + output1_dtype, + ) + config_dir = models_dir + "/" + model_name + config = """ +backend: "pytorch" +name: "{}" +platform: "torch_aoti" +max_batch_size: {} +version_policy: {} +input [ + {{ + name: "INPUT0" + data_type: {} + dims: [ {} ] + }}, + {{ + name: "INPUT1" + data_type: {} + dims: [ {} ] + }} +] +output [ + {{ + name: "OUTPUT__0" + data_type: {} + dims: [ {} ] + label_filename: "output0_labels.txt" + }}, + {{ + name: "OUTPUT__1" + data_type: {} + dims: [ {} ] + }} +] +""".format( + model_name, + max_batch, + version_policy_str, + np_to_model_dtype(input_dtype), + tu.shape_to_dims_str(input_shape), + np_to_model_dtype(input_dtype), + tu.shape_to_dims_str(input_shape), + np_to_model_dtype(output0_dtype), + tu.shape_to_dims_str(output0_shape), + np_to_model_dtype(output1_dtype), + tu.shape_to_dims_str(output1_shape), + ) + + try: + os.makedirs(config_dir) + except OSError as ex: + pass # ignore existing dir + + with open(config_dir + "/config.pbtxt", "w") as cfile: + cfile.write(config) + + with open(config_dir + "/output0_labels.txt", "w") as lfile: + for l in range(output0_label_cnt): + lfile.write("label" + str(l) + "\n") + + def create_openvino_modelfile( models_dir, max_batch, @@ -1743,6 +1904,33 @@ def create_models( output1_dtype, ) + if FLAGS.torch_aoti: + # max-batch 8 + create_torch_aoti_modelconfig( + models_dir, + 8, + model_version, + input_shape, + output0_shape, + output1_shape, + input_dtype, + output0_dtype, + output1_dtype, + output0_label_cnt, + version_policy, + ) + create_torch_aoti_modelfile( + models_dir, + 8, + model_version, + input_shape, + output0_shape, + output1_shape, + input_dtype, + output0_dtype, + output1_dtype, + ) + if FLAGS.openvino: # max-batch 8 create_openvino_modelconfig( @@ -1933,6 +2121,12 @@ def create_fixed_models( action="store_true", help="Generate Pytorch LibTorch models", ) + parser.add_argument( + "--torch-aoti", + required=False, + action="store_true", + help="Generate Pytorch LibTorch models using PT2", + ) parser.add_argument( "--openvino", required=False, @@ -1959,7 +2153,7 @@ def create_fixed_models( import tensorrt as trt if FLAGS.onnx: import onnx - if FLAGS.libtorch: + if FLAGS.libtorch or FLAGS.torch_aoti: import torch from torch import nn if FLAGS.openvino: @@ -2116,6 +2310,7 @@ def create_fixed_models( create_onnx_modelfile( FLAGS.models_dir, 0, 3, (16,), (16,), (16,), vt, vt, vt, swap=True ) + if FLAGS.libtorch: for vt in [np.float32, np.int32, np.int16, np.int8]: create_libtorch_modelfile( @@ -2130,6 +2325,22 @@ def create_fixed_models( create_libtorch_modelfile( FLAGS.models_dir, 0, 3, (16,), (16,), (16,), vt, vt, vt, swap=True ) + + if FLAGS.torch_aoti: + for vt in [np.float32, np.int32, np.int16, np.int8]: + create_torch_aoti_modelfile( + FLAGS.models_dir, 8, 2, (16,), (16,), (16,), vt, vt, vt, swap=True + ) + create_torch_aoti_modelfile( + FLAGS.models_dir, 8, 3, (16,), (16,), (16,), vt, vt, vt, swap=True + ) + create_torch_aoti_modelfile( + FLAGS.models_dir, 0, 2, (16,), (16,), (16,), vt, vt, vt, swap=True + ) + create_torch_aoti_modelfile( + FLAGS.models_dir, 0, 3, (16,), (16,), (16,), vt, vt, vt, swap=True + ) + if FLAGS.openvino: for vt in [np.float16, np.float32, np.int8, np.int16, np.int32]: create_openvino_modelfile( diff --git a/qa/common/gen_qa_torch_aoti_model.py b/qa/common/gen_qa_torch_aoti_model.py new file mode 100644 index 0000000000..9620281c1b --- /dev/null +++ b/qa/common/gen_qa_torch_aoti_model.py @@ -0,0 +1,121 @@ +# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +import argparse +import os + +import torch +from torch import nn + + +class AddSubNet(nn.Module): + def __init__(self): + super(AddSubNet, self).__init__() + + def forward(self, input0, input1): + return (input0 + input1), (input0 - input1) + + +def generate_model(model_dir): + model = AddSubNet() + + os.makedirs(model_dir, exist_ok=True) + model_path = os.path.join(model_dir, "model.pt2") + + ep = torch.export.export(model, (torch.randn(4), torch.randn(4)),) + + torch.export.save(ep, model_path) + + +def generate_config(config_path): + with open(f"{config_path}/config.pbtxt", "w") as f: + f.write( + """ +backend: "pytorch" +input [ + { + name: "INPUT0" + data_type: TYPE_FP32 + dims: [ 4 ] + } +] +input [ + { + name: "INPUT1" + data_type: TYPE_FP32 + dims: [ 4 ] + } +] +output [ + { + name: "OUTPUT0" + data_type: TYPE_FP32 + dims: [ 4 ] + } +] +output [ + { + name: "OUTPUT1" + data_type: TYPE_FP32 + dims: [ 4 ] + } +] +""" + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "-m", + "--model-directory", + type=str, + required=True, + help="The path to the model repository.", + ) + parser.add_argument( + "--model-name", + type=str, + required=False, + default="add_sub_pytorch", + help="Model name", + ) + parser.add_argument( + "--version", + type=str, + required=False, + default="1", + help="Model version", + ) + + args = parser.parse_args() + + model_directory = os.path.join(args.model_directory, args.model_name) + os.makedirs(model_directory, exist_ok=True) + + generate_model(model_dir=os.path.join(model_directory, args.version)) + generate_config(model_directory) From c98a83708117cb097297a3c0e3d0ec18459ef59e Mon Sep 17 00:00:00 2001 From: J Wyman Date: Thu, 4 Dec 2025 13:42:54 -0500 Subject: [PATCH 2/6] add backend: pytorch to model config generation --- qa/common/gen_qa_models.py | 1 + 1 file changed, 1 insertion(+) diff --git a/qa/common/gen_qa_models.py b/qa/common/gen_qa_models.py index 75ade739d6..a2ab6437d1 100755 --- a/qa/common/gen_qa_models.py +++ b/qa/common/gen_qa_models.py @@ -1384,6 +1384,7 @@ def create_libtorch_modelconfig( ) config_dir = models_dir + "/" + model_name config = """ +backend: "pytorch" name: "{}" platform: "pytorch_libtorch" max_batch_size: {} From 5540e489be654fc823e3c166618acabfbd57b8e3 Mon Sep 17 00:00:00 2001 From: J Wyman Date: Thu, 4 Dec 2025 14:40:36 -0500 Subject: [PATCH 3/6] fix pedantic style complaints and very valid copyright mistakes --- qa/L0_backend_python/python_based_backends/test.sh | 2 +- qa/common/gen_qa_models.py | 1 + qa/common/gen_qa_torch_aoti_model.py | 7 +++++-- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/qa/L0_backend_python/python_based_backends/test.sh b/qa/L0_backend_python/python_based_backends/test.sh index ea9e805109..b206feeda7 100755 --- a/qa/L0_backend_python/python_based_backends/test.sh +++ b/qa/L0_backend_python/python_based_backends/test.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions diff --git a/qa/common/gen_qa_models.py b/qa/common/gen_qa_models.py index a2ab6437d1..a92d0b406a 100755 --- a/qa/common/gen_qa_models.py +++ b/qa/common/gen_qa_models.py @@ -1328,6 +1328,7 @@ class AddSubNet2(nn.Module): def __init__(self, swap): self.swap = swap super(AddSubNet2, self).__init__() + def forward(self, INPUT0, INPUT1): op0 = (INPUT0 - INPUT1) if self.swap else (INPUT0 + INPUT1) op1 = (INPUT0 + INPUT1) if self.swap else (INPUT0 - INPUT1) diff --git a/qa/common/gen_qa_torch_aoti_model.py b/qa/common/gen_qa_torch_aoti_model.py index 9620281c1b..ce7e97b6d7 100644 --- a/qa/common/gen_qa_torch_aoti_model.py +++ b/qa/common/gen_qa_torch_aoti_model.py @@ -1,4 +1,4 @@ -# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -46,7 +46,10 @@ def generate_model(model_dir): os.makedirs(model_dir, exist_ok=True) model_path = os.path.join(model_dir, "model.pt2") - ep = torch.export.export(model, (torch.randn(4), torch.randn(4)),) + ep = torch.export.export( + model, + (torch.randn(4), torch.randn(4)), + ) torch.export.save(ep, model_path) From c46edce35b78e7f713b35dbf740dbc3c726cf0e9 Mon Sep 17 00:00:00 2001 From: J Wyman Date: Thu, 4 Dec 2025 18:35:09 -0500 Subject: [PATCH 4/6] WIP --- qa/common/gen_qa_models.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/qa/common/gen_qa_models.py b/qa/common/gen_qa_models.py index a92d0b406a..00131fd694 100755 --- a/qa/common/gen_qa_models.py +++ b/qa/common/gen_qa_models.py @@ -1028,7 +1028,6 @@ def create_libtorch_modelfile( output1_dtype, swap=False, ): - print("<<<< create_libtorch_modelfile >>>>", flush=True) if not tu.validate_for_libtorch_model( input_dtype, output0_dtype, @@ -1296,7 +1295,6 @@ def create_torch_aoti_modelfile( output1_dtype, swap=False, ): - print("<<<< create_torch_inductor_modelfile >>>>", flush=True) if not tu.validate_for_libtorch_model( input_dtype, output0_dtype, @@ -1353,7 +1351,6 @@ def create_libtorch_modelconfig( output0_label_cnt, version_policy, ): - print("<<<< create_libtorch_modelconfig >>>>", flush=True) if not tu.validate_for_libtorch_model( input_dtype, output0_dtype, @@ -1455,7 +1452,6 @@ def create_torch_aoti_modelconfig( output0_label_cnt, version_policy, ): - print("<<<< create_torch_aoti_modelconfig >>>>", flush=True) if max_batch <= 0: raise ValueError("torch aot inductor model must have max_batch > 0") if not tu.validate_for_libtorch_model( From 049480e3c00cbceab99e1424d6722fa568cc7086 Mon Sep 17 00:00:00 2001 From: J Wyman Date: Fri, 5 Dec 2025 18:11:02 -0500 Subject: [PATCH 5/6] pedantic style cop fix --- qa/common/gen_qa_torch_aoti_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/qa/common/gen_qa_torch_aoti_model.py b/qa/common/gen_qa_torch_aoti_model.py index ce7e97b6d7..76137430e7 100644 --- a/qa/common/gen_qa_torch_aoti_model.py +++ b/qa/common/gen_qa_torch_aoti_model.py @@ -47,8 +47,8 @@ def generate_model(model_dir): model_path = os.path.join(model_dir, "model.pt2") ep = torch.export.export( - model, - (torch.randn(4), torch.randn(4)), + model, + (torch.randn(4), torch.randn(4)), ) torch.export.save(ep, model_path) From fb4ad2e91ac78791e142b6b6dc37155ffca20511 Mon Sep 17 00:00:00 2001 From: J Wyman Date: Mon, 8 Dec 2025 19:13:56 -0500 Subject: [PATCH 6/6] fix variable name error after rebase --- qa/common/gen_qa_model_repository | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qa/common/gen_qa_model_repository b/qa/common/gen_qa_model_repository index fa21b56ac5..6952b1ee55 100755 --- a/qa/common/gen_qa_model_repository +++ b/qa/common/gen_qa_model_repository @@ -243,7 +243,7 @@ chmod -R 777 $TRITON_MDLS_QA_MODEL python3 $TRITON_MDLS_SRC_DIR/gen_qa_models.py --libtorch --variable --models_dir=$TRITON_MDLS_QA_VARIABLE_MODEL python3 $TRITON_MDLS_SRC_DIR/gen_qa_models.py --torch-aoti --variable --models_dir=$TRITON_MDLS_QA_VARIABLE_MODEL chmod -R 777 $TRITON_MDLS_QA_VARIABLE_MODEL -python3 $TRITON_MDLS_SRC_DIR/gen_qa_identity_models.py --libtorch --models_dir=$VOLUME_IDENTITYDESTDIR +python3 $TRITON_MDLS_SRC_DIR/gen_qa_identity_models.py --libtorch --models_dir=$TRITON_MDLS_QA_IDENTITY_MODEL chmod -R 777 $TRITON_MDLS_QA_IDENTITY_MODEL python3 $TRITON_MDLS_SRC_DIR/gen_qa_reshape_models.py --libtorch --variable --models_dir=$TRITON_MDLS_QA_RESHAPE_MODEL chmod -R 777 $TRITON_MDLS_QA_RESHAPE_MODEL