triton-inference-server · whoisj · Nov 25, 2025 · Dec 4, 2025 · Dec 4, 2025 · Dec 4, 2025
diff --git a/qa/L0_backend_python/python_based_backends/test.sh b/qa/L0_backend_python/python_based_backends/test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -35,6 +35,7 @@ CLIENT_LOG="./python_based_backends_client.log"
 TEST_RESULT_FILE="./test_results.txt"
 CLIENT_PY="./python_based_backends_test.py"
 GEN_PYTORCH_MODEL_PY="../../common/gen_qa_pytorch_model.py"
+GEN_TORCHAOTI_MODEL_PY="../../common/gen_qa_torch_aoti_model.py"
 RET=0
 
 rm -rf ${MODEL_REPOSITORY}

diff --git a/qa/common/gen_qa_model_repository b/qa/common/gen_qa_model_repository
@@ -238,8 +238,10 @@ pip3 install onnxscript
 set -e
 set -x
 python3 $TRITON_MDLS_SRC_DIR/gen_qa_models.py --libtorch --models_dir=$TRITON_MDLS_QA_MODEL
+python3 $TRITON_MDLS_SRC_DIR/gen_qa_models.py --torch-aoti --models_dir=$TRITON_MDLS_QA_MODEL
 chmod -R 777 $TRITON_MDLS_QA_MODEL
 python3 $TRITON_MDLS_SRC_DIR/gen_qa_models.py --libtorch --variable --models_dir=$TRITON_MDLS_QA_VARIABLE_MODEL
+python3 $TRITON_MDLS_SRC_DIR/gen_qa_models.py --torch-aoti --variable --models_dir=$TRITON_MDLS_QA_VARIABLE_MODEL
 chmod -R 777 $TRITON_MDLS_QA_VARIABLE_MODEL
 python3 $TRITON_MDLS_SRC_DIR/gen_qa_identity_models.py --libtorch --models_dir=$TRITON_MDLS_QA_IDENTITY_MODEL
 chmod -R 777 $TRITON_MDLS_QA_IDENTITY_MODEL

diff --git a/qa/common/gen_qa_models.py b/qa/common/gen_qa_models.py
@@ -1283,6 +1283,61 @@ def forward(self, INPUT0, INPUT1):
     traced.save(model_version_dir + "/model.pt")
 
 
+def create_torch_aoti_modelfile(
+    models_dir,
+    max_batch,
+    model_version,
+    input_shape,
+    output0_shape,
+    output1_shape,
+    input_dtype,
+    output0_dtype,
+    output1_dtype,
+    swap=False,
+):
+    if not tu.validate_for_libtorch_model(
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+        input_shape,
+        output0_shape,
+        output1_shape,
+        max_batch,
+    ):
+        return
+
+    model_name = tu.get_model_name(
+        "torch_aoti",
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+    )
+    # handle for -1 (when variable) since can't create tensor with shape of [-1]
+    input_shape = [abs(ips) for ips in input_shape]
+
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError:
+        pass  # ignore existing dir
+
+    class AddSubNet2(nn.Module):
+        def __init__(self, swap):
+            self.swap = swap
+            super(AddSubNet2, self).__init__()
+
+        def forward(self, INPUT0, INPUT1):
+            op0 = (INPUT0 - INPUT1) if self.swap else (INPUT0 + INPUT1)
+            op1 = (INPUT0 + INPUT1) if self.swap else (INPUT0 - INPUT1)
+            return op0, op1
+
+    ep = torch.export.export(
+        AddSubNet2(swap), (torch.randn(*input_shape), torch.randn(*input_shape))
+    )
+    torch.export.save(ep, model_version_dir + "/model.pt2")
+
+
 def create_libtorch_modelconfig(
     models_dir,
     max_batch,
@@ -1327,6 +1382,7 @@ def create_libtorch_modelconfig(
     )
     config_dir = models_dir + "/" + model_name
     config = """
+backend: "pytorch"
 name: "{}"
 platform: "pytorch_libtorch"
 max_batch_size: {}
@@ -1383,6 +1439,109 @@ def create_libtorch_modelconfig(
             lfile.write("label" + str(l) + "\n")
 
 
+def create_torch_aoti_modelconfig(
+    models_dir,
+    max_batch,
+    model_version,
+    input_shape,
+    output0_shape,
+    output1_shape,
+    input_dtype,
+    output0_dtype,
+    output1_dtype,
+    output0_label_cnt,
+    version_policy,
+):
+    if max_batch <= 0:
+        raise ValueError("torch aot inductor model must have max_batch > 0")
+    if not tu.validate_for_libtorch_model(
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+        input_shape,
+        output0_shape,
+        output1_shape,
+        max_batch,
+    ):
+        return
+
+    # Unpack version policy
+    version_policy_str = "{ latest { num_versions: 1 }}"
+    if version_policy is not None:
+        type, val = version_policy
+        if type == "latest":
+            version_policy_str = "{{ latest {{ num_versions: {} }}}}".format(val)
+        elif type == "specific":
+            version_policy_str = "{{ specific {{ versions: {} }}}}".format(val)
+        else:
+            version_policy_str = "{ all { }}"
+
+    # Use a different model name for the non-batching variant
+    model_name = tu.get_model_name(
+        "torch_aoti",
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+    )
+    config_dir = models_dir + "/" + model_name
+    config = """
+backend: "pytorch"
+name: "{}"
+platform: "torch_aoti"
+max_batch_size: {}
+version_policy: {}
+input [
+  {{
+    name: "INPUT0"
+    data_type: {}
+    dims: [ {} ]
+  }},
+  {{
+    name: "INPUT1"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+output [
+  {{
+    name: "OUTPUT__0"
+    data_type: {}
+    dims: [ {} ]
+    label_filename: "output0_labels.txt"
+  }},
+  {{
+    name: "OUTPUT__1"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+""".format(
+        model_name,
+        max_batch,
+        version_policy_str,
+        np_to_model_dtype(input_dtype),
+        tu.shape_to_dims_str(input_shape),
+        np_to_model_dtype(input_dtype),
+        tu.shape_to_dims_str(input_shape),
+        np_to_model_dtype(output0_dtype),
+        tu.shape_to_dims_str(output0_shape),
+        np_to_model_dtype(output1_dtype),
+        tu.shape_to_dims_str(output1_shape),
+    )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+    with open(config_dir + "/output0_labels.txt", "w") as lfile:
+        for l in range(output0_label_cnt):
+            lfile.write("label" + str(l) + "\n")
+
+
 def create_openvino_modelfile(
     models_dir,
     max_batch,
@@ -1743,6 +1902,33 @@ def create_models(
             output1_dtype,
         )
 
+    if FLAGS.torch_aoti:
+        # max-batch 8
+        create_torch_aoti_modelconfig(
+            models_dir,
+            8,
+            model_version,
+            input_shape,
+            output0_shape,
+            output1_shape,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            output0_label_cnt,
+            version_policy,
+        )
+        create_torch_aoti_modelfile(
+            models_dir,
+            8,
+            model_version,
+            input_shape,
+            output0_shape,
+            output1_shape,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+        )
+
     if FLAGS.openvino:
         # max-batch 8
         create_openvino_modelconfig(
@@ -1933,6 +2119,12 @@ def create_fixed_models(
         action="store_true",
         help="Generate Pytorch LibTorch models",
     )
+    parser.add_argument(
+        "--torch-aoti",
+        required=False,
+        action="store_true",
+        help="Generate Pytorch LibTorch models using PT2",
+    )
     parser.add_argument(
         "--openvino",
         required=False,
@@ -1959,7 +2151,7 @@ def create_fixed_models(
         import tensorrt as trt
     if FLAGS.onnx:
         import onnx
-    if FLAGS.libtorch:
+    if FLAGS.libtorch or FLAGS.torch_aoti:
         import torch
         from torch import nn
     if FLAGS.openvino:
@@ -2116,6 +2308,7 @@ def create_fixed_models(
                 create_onnx_modelfile(
                     FLAGS.models_dir, 0, 3, (16,), (16,), (16,), vt, vt, vt, swap=True
                 )
+
         if FLAGS.libtorch:
             for vt in [np.float32, np.int32, np.int16, np.int8]:
                 create_libtorch_modelfile(
@@ -2130,6 +2323,22 @@ def create_fixed_models(
                 create_libtorch_modelfile(
                     FLAGS.models_dir, 0, 3, (16,), (16,), (16,), vt, vt, vt, swap=True
                 )
+
+        if FLAGS.torch_aoti:
+            for vt in [np.float32, np.int32, np.int16, np.int8]:
+                create_torch_aoti_modelfile(
+                    FLAGS.models_dir, 8, 2, (16,), (16,), (16,), vt, vt, vt, swap=True
+                )
+                create_torch_aoti_modelfile(
+                    FLAGS.models_dir, 8, 3, (16,), (16,), (16,), vt, vt, vt, swap=True
+                )
+                create_torch_aoti_modelfile(
+                    FLAGS.models_dir, 0, 2, (16,), (16,), (16,), vt, vt, vt, swap=True
+                )
+                create_torch_aoti_modelfile(
+                    FLAGS.models_dir, 0, 3, (16,), (16,), (16,), vt, vt, vt, swap=True
+                )
+
         if FLAGS.openvino:
             for vt in [np.float16, np.float32, np.int8, np.int16, np.int32]:
                 create_openvino_modelfile(