ok fix the spec issues at the source

JacobSzwejbka · JacobSzwejbka · commit 3bbb02cdfafa · 2025-12-10T15:05:30.000-08:00
diff --git a/exir/pass_base.py b/exir/pass_base.py
@@ -554,16 +554,25 @@ def call_scan(
         self,
         combine_fn: torch.fx.GraphModule,
         init: List[ProxyValue],
-        xs: List[ProxyValue],
+        xs: List[Argument],
         additional_inputs: List[ProxyValue],
         meta: NodeMetadata,
     ) -> ProxyValue:
-        xs_first_slice = _unstack_pytree([arg.data for arg in xs])[0]
-        init_data = [arg.data for arg in init]
-        additional_data = [arg.data for arg in additional_inputs]
+        # Get the expected x element shapes from the combine_fn's placeholders
+        # The combine_fn expects: (carry..., x_element..., additional_inputs...)
+        combine_fn_placeholders = [
+            n for n in combine_fn.graph.nodes if n.op == "placeholder"
+        ]
+        num_init = len(init)
+        # The x_element placeholders are at indices [num_init : num_init + num_xs]
+        xs_element_data = []
+        for i, x_proxy in enumerate(xs):
+            ph = combine_fn_placeholders[num_init + i]
+            # Use the placeholder's val which has the correct shape
+            xs_element_data.append(ph.meta["val"])
 
         combine_fn_result = self.call_submodule(
-            combine_fn, tuple(init_data + xs_first_slice + additional_data)
+            combine_fn, (*init , *xs_element_data , *additional_inputs)
         )
         assert combine_fn_result is not None
 
diff --git a/exir/passes/spec_prop_pass.py b/exir/passes/spec_prop_pass.py
@@ -12,7 +12,6 @@
 import torch
 from executorch.exir.delegate import executorch_call_delegate
 from executorch.exir.pass_base import ExportPass, NodeMetadata, ProxyValue
-from executorch.exir.schema import TensorShapeDynamism
 from executorch.exir.tensor import TensorSpec
 from torch.export.exported_program import ExportGraphSignature
 from torch.fx.node import Node
@@ -60,18 +59,15 @@ def __call__(self, graph_module: torch.fx.GraphModule) -> PassResult:
         res = ExportPass()(graph_module)
         assert res is not None
         gm = res.graph_module
-
         def get_spec(x):
             if hasattr(x, "meta"):
                 return x.meta.get("spec", None)
             else:
                 return None
-
         for module in gm.modules():
             if isinstance(module, torch.fx.GraphModule):
                 for node in module.graph.nodes:
                     meta_val = node.meta.get("val", None)
-
                     if node.op == "output":
                         node.meta["spec"] = pytree.tree_map(get_spec, node.args[0])
                     elif node.op == "call_function" and node.target == operator.getitem:
@@ -123,152 +119,3 @@ def update_placeholder_tensor_specs(
                 in exported_program.graph_signature.inputs_to_lifted_tensor_constants
             ):
                 spec.const = True
-
-    # pyre-ignore
-    def placeholder(self, name: str, arg, meta):
-        meta["spec"] = make_spec(arg)
-        return super().placeholder(name, arg, meta)
-
-    # pyre-ignore
-    def call_operator(self, op, args, kwargs, meta):
-        args_data, kwargs_data = pytree.tree_map_only(
-            ProxyValue, lambda x: x.data, (args, kwargs)
-        )
-        meta["spec"] = pytree.tree_map(make_spec, op(*args_data, **kwargs_data))
-        return super().call_operator(op, args, kwargs, meta)
-
-    # pyre-ignore
-    def call_getitem(self, value, key: int, meta):
-        meta["spec"] = value.node.meta["spec"][key]
-        return super().call_getitem(value, key, meta)
-
-    # pyre-ignore
-    def call_cond(self, pred, true_fn, false_fn, inputs, meta):
-        # true_fn/false_fn return tensors of the same shape, so we can pick
-        # either one here.
-        *_, true_out_node = true_fn.graph.nodes
-        meta["spec"] = pytree.tree_map(make_spec, true_out_node.meta["val"])
-        return super().call_cond(pred, true_fn, false_fn, inputs, meta)
-
-    def call_while(
-        self,
-        cond_fn: torch.fx.GraphModule,
-        body_fn: torch.fx.GraphModule,
-        carried_inputs: List[ProxyValue],
-        additional_inputs: List[ProxyValue],
-        meta: NodeMetadata,
-    ):
-        meta["spec"] = pytree.tree_map(make_spec, carried_inputs)
-        return super().call_while(
-            cond_fn, body_fn, carried_inputs, additional_inputs, meta
-        )
-
-    def call_map(
-        self,
-        f: torch.fx.GraphModule,
-        mapped_args: List[ProxyValue],
-        operands: List[ProxyValue],
-        meta: NodeMetadata,
-    ) -> ProxyValue:
-        mapped_dim_size = [arg.data for arg in mapped_args][0].size(0)
-        *_, body_out_node = f.graph.nodes
-        body_out_node_fake_tensor = body_out_node.meta["val"]
-
-        # For dynamic shapes, initialize with size 0 in the mapped dimension.
-        # The et_copy_index op will resize as it writes to each index.
-        # Check if the mapped dimension is symbolic (dynamic).
-        is_dynamic = isinstance(mapped_dim_size, torch.SymInt)
-        init_size = 0 if is_dynamic else mapped_dim_size
-
-        map_fake_tensor = pytree.tree_map_only(
-            torch.Tensor,
-            lambda x: x.new_empty(init_size, *x.shape),
-            body_out_node_fake_tensor,
-        )
-        meta["spec"] = pytree.tree_map(make_spec, map_fake_tensor)
-        return super().call_map(f, mapped_args, operands, meta)
-
-    def call_scan(
-        self,
-        combine_fn: torch.fx.GraphModule,
-        init: List[ProxyValue],
-        xs: List[ProxyValue],
-        additional_inputs: List[ProxyValue],
-        meta: NodeMetadata,
-    ) -> ProxyValue:
-        # Get the scan length - this may be symbolic for dynamic shapes
-        xs_tensor = [arg.data for arg in xs][0]
-        scan_length = xs_tensor.size(0)
-
-        *_, body_out_node = combine_fn.graph.nodes
-        body_out_fake = body_out_node.meta["val"]
-
-        num_carry = len(init)
-        flat_body_out, out_spec = pytree.tree_flatten(body_out_fake)
-
-        carry_out = flat_body_out[:num_carry]
-        y_out = flat_body_out[num_carry:]
-
-        # Check if the scan dimension is symbolic (dynamic)
-        is_dynamic = isinstance(scan_length, torch.SymInt)
-
-        # For the y outputs, we need to use the upper bound size to allocate memory,
-        # but also mark the tensor spec as DYNAMIC_BOUND so it can be resized at runtime.
-        if is_dynamic:
-            # Get the upper bound by evaluating the symbolic int
-            # Using hint gives us the concrete upper bound value
-            upper_bound_size = scan_length.node.shape_env.size_hint(
-                scan_length.node.expr
-            )
-        else:
-            upper_bound_size = scan_length
-
-        carry_fake = carry_out
-        y_fake = [
-            (
-                x.new_empty(upper_bound_size, *x.shape)
-                if isinstance(x, torch.Tensor)
-                else x
-            )
-            for x in y_out
-        ]
-
-        combined_fake = carry_fake + y_fake
-
-        # Create specs from the fake tensors
-        specs = pytree.tree_map(make_spec, combined_fake)
-
-        # For dynamic shapes, mark the y_output specs as DYNAMIC_BOUND
-        # so that et_copy_index can resize them at runtime
-        if is_dynamic and isinstance(specs, list):
-            for i in range(num_carry, len(specs)):
-                if isinstance(specs[i], TensorSpec):
-                    specs[i].shape_dynamism = TensorShapeDynamism.DYNAMIC_BOUND
-
-        meta["spec"] = specs
-        return super().call_scan(combine_fn, init, xs, additional_inputs, meta)
-
-    # pyre-ignore
-    def call_delegate(self, lowered_module, args, kwargs, meta):
-        args_data, kwargs_data = pytree.tree_map_only(
-            ProxyValue, lambda x: x.data, (args, kwargs)
-        )
-        # If spec is missing, re-genenrate it with args data
-        if "spec" not in meta:
-            meta["spec"] = pytree.tree_map(
-                make_spec,
-                executorch_call_delegate(lowered_module, *args_data),
-            )
-        return super().call_delegate(lowered_module, args, kwargs, meta)
-
-    # pyre-ignore
-    def output(self, results, meta):
-        # pyre-ignore
-        def get_spec(x):
-            if isinstance(x, ProxyValue):
-                return x.node.meta["spec"]
-            else:
-                return make_spec(x)
-
-        meta["spec"] = pytree.tree_map(get_spec, results)
-        return super().output(results, meta)
diff --git a/exir/tests/test_passes.py b/exir/tests/test_passes.py
@@ -745,6 +745,109 @@ def loop_body(i, acc):
         upper_bound = eval_upper_bound(spec[1].shape[0])
         self.assertEqual(upper_bound, 10)
 
+    def test_spec_prop_pass_scan(self) -> None:
+        from torch._higher_order_ops.scan import scan
+
+        class ModelWithScan(torch.nn.Module):
+            def forward(self, xs: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+                def combine_fn(carry, x):
+                    new_carry = carry + x
+                    return new_carry, new_carry.clone()
+
+                init = torch.zeros_like(xs[0])
+                return scan(combine_fn, init, xs)
+
+        model = ModelWithScan()
+        inputs = (torch.arange(15).float().reshape(5, 3),)
+
+        # Run the spec prop pass and sanity check the spec on the scan.
+        edge_program = to_edge(
+            export(model, inputs, strict=True),
+            compile_config=EdgeCompileConfig(_check_ir_validity=False),
+        )
+        gm = edge_program.exported_program().graph_module
+        new_gm = SpecPropPass()(gm)
+        self.assertIsNotNone(new_gm)
+
+        # Check the spec for the scan node.
+        scan_node = next(
+            n
+            for n in new_gm.graph_module.graph.nodes
+            if hasattr(n.target, "name") and n.target.name() == "scan"
+        )
+        self.assertIsNotNone(scan_node)
+
+        # Spec for the scan node should be a two-element tuple (carry, stacked_outputs)
+        spec: Tuple[TensorSpec, TensorSpec] = scan_node.meta["spec"]
+        self.assertTrue(isinstance(spec, tuple))
+        self.assertEqual(len(spec), 2)
+
+        # Carry should have shape [3] (same as xs[0])
+        self.assertEqual(list(spec[0].shape), [3])
+        # Stacked outputs should have shape [5, 3] (same as xs)
+        self.assertEqual(list(spec[1].shape), [5, 3])
+
+    def test_spec_prop_pass_scan_dynamic_shape(self) -> None:
+        from torch._higher_order_ops.scan import scan
+
+        class ModelWithScan(torch.nn.Module):
+            def forward(self, xs: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+                def combine_fn(carry, x):
+                    new_carry = carry + x
+                    return new_carry, new_carry.clone()
+
+                init = torch.zeros_like(xs[0])
+                return scan(combine_fn, init, xs)
+
+        model = ModelWithScan()
+        inputs = (torch.arange(15).float().reshape(5, 3),)
+        dynamic_shapes = {"xs": {0: torch.export.Dim("seq_len", min=1, max=20)}}
+
+        # First verify that export preserves symbolic shapes
+        exported = export(model, inputs, dynamic_shapes=dynamic_shapes, strict=True)
+        scan_node_after_export = next(
+            n
+            for n in exported.graph.nodes
+            if hasattr(n.target, "name") and n.target.name() == "scan"
+        )
+        val_after_export = scan_node_after_export.meta.get("val")
+        self.assertIsNotNone(val_after_export)
+        # After export, the stacked output should have a symbolic first dimension
+        self.assertIsInstance(val_after_export[1].shape[0], torch.SymInt)
+
+        # Run the spec prop pass and sanity check the spec on the scan.
+        edge_program = to_edge(
+            exported,
+            compile_config=EdgeCompileConfig(_check_ir_validity=False),
+        )
+        gm = edge_program.exported_program().graph_module
+        new_gm = SpecPropPass()(gm)
+        self.assertIsNotNone(new_gm)
+
+        # Check the spec for the scan node.
+        scan_node = next(
+            n
+            for n in new_gm.graph_module.graph.nodes
+            if hasattr(n.target, "name") and n.target.name() == "scan"
+        )
+        self.assertIsNotNone(scan_node)
+
+        # Spec for the scan node should be a two-element tuple (carry, stacked_outputs)
+        spec: Tuple[TensorSpec, TensorSpec] = scan_node.meta["spec"]
+        self.assertTrue(isinstance(spec, tuple))
+        self.assertEqual(len(spec), 2)
+
+        # Carry should have static shape [3]
+        self.assertEqual(list(spec[0].shape), [3])
+        self.assertEqual(spec[0].shape_dynamism, TensorShapeDynamism.STATIC)
+
+        # Stacked outputs should have dynamic first dimension with upper bound 20
+        self.assertEqual(len(spec[1].shape), 2)
+        upper_bound = eval_upper_bound(spec[1].shape[0])
+        self.assertEqual(upper_bound, 20)
+        self.assertEqual(spec[1].shape_dynamism, TensorShapeDynamism.DYNAMIC_BOUND)
+        self.assertEqual(spec[1].shape[1], 3)  # Second dim is static
+
     def test_compile_fix_broken_ops(self) -> None:
         class ExportableLoop(nn.Module):
             def __init__(self, hidden_size, out_channels):