I'm going for pizzas, it was an incredible day!

cetagostini · jessegrabowski · commit eebbd4767a9b · 2025-04-19T13:25:18.000-05:00
diff --git a/pytensor/link/mlx/dispatch/blockwise.py b/pytensor/link/mlx/dispatch/blockwise.py
@@ -7,49 +7,108 @@
 import numpy as np
 
 def blockwise_conv1d(op, node, **kwargs):
-    if op.core_op.mode != "valid":
-        raise NotImplementedError("Only 'valid' mode is supported for conv1d")
-    # batches_ndim = op.batch_ndim(node)
-    # if batches_ndim != 1:
-    #     raise NotImplementedError("Only 1D batches are supported for conv1d")
+    # if op.core_op.mode != "valid":
+    #     raise NotImplementedError("Only 'valid' mode is supported for conv1d")
     
-    # _, kernel = node.inputs
-    # if not all(kernel.type.broadcastable[:batches_ndim]):
-    #     raise NotImplementedError("Only 1D batches are supported for conv1d")
+    # def inner_f(x, kernel):
+    #     B, T = x.shape
+    #     Bk, K = kernel.shape
+    #     if B != Bk:
+    #         raise ValueError(f"Batch mismatch: x has {B}, kernels has {Bk}")
+
+    #     # 1) Flip each kernel for true convolution
+    #     kernels_flipped = kernel[:, ::-1]  # shape (B, K)
+
+    #     # 2) Reshape input into (N=1, H=T, C_in=B)
+    #     x_in = x.T[None, :, :]              
+
+    #     # 3) Build weight tensor of shape (C_out=B, H_f=K, C_in=1)
+    #     w = kernels_flipped[:, :, None]     
+
+    #     # 4) Convolve with one group per channel → valid mode
+    #     y = mx.conv1d(
+    #         x_in, w,
+    #         stride=1,
+    #         padding=0,
+    #         dilation=1,
+    #         groups=B
+    #     )
+    #     # y: (1, T-K+1, B) → drop batch and transpose to (B, T-K+1)
+    #     return y[0].T
     
-    def inner_f(x, kernel):
+    def batched_conv1d(
+            x: mx.array,
+            kernels: mx.array,
+            mode: str = op.core_op.mode,
+            stride: int = 1,
+            dilation: int = 1) -> mx.array:
+        """
+        Apply B separate 1D convolutions (full or valid) to B sequences in parallel.
+
+        Parameters
+        ----------
+        x        : array of shape (B, T)
+                B sequences of length T.
+        kernels  : array of shape (B, K)
+                B kernels of length K.
+        mode     : {"valid", "full"}
+                "valid" → no padding, output length = T - K + 1
+                "full"  → zero‑pad so output length = T + K - 1
+        stride   : int, convolution stride (default=1)
+        dilation : int, convolution dilation (default=1)
+
+        Returns
+        -------
+        out      : array of shape (B, L)
+                where L = 
+                    - T - K + 1   if mode="valid"
+                    - T + K - 1   if mode="full"
+        """
+        # --- 1) shape checks ---
         B, T = x.shape
-        Bk, K = kernel.shape
+        Bk, K = kernels.shape
         if B != Bk:
             raise ValueError(f"Batch mismatch: x has {B}, kernels has {Bk}")
 
-        # 1) Flip each kernel for true convolution
-        kernels_flipped = kernel[:, ::-1]  # shape (B, K)
+        # --- 2) flip kernels for convolution ---
+        kernels_flipped = kernels[:, ::-1]  # shape (B, K)
+
+        # --- 3) decide padding ---
+        if mode == "valid":
+            pad = 0
+        elif mode == "full":
+            pad = (K - 1) * dilation
+        else:
+            raise ValueError(f"Unsupported mode {mode!r}: choose 'valid' or 'full'")
 
-        # 2) Reshape input into (N=1, H=T, C_in=B)
-        x_in = x.T[None, :, :]              
+        # --- 4) reshape into MLX conv1d form ---
+        #   input: (N=1, H=T, C_in=B)
+        x_in = x.T[None, :, :]
 
-        # 3) Build weight tensor of shape (C_out=B, H_f=K, C_in=1)
-        w = kernels_flipped[:, :, None]     
+        #   weight: (C_out=B, H_f=K, C_in=1)
+        w = kernels_flipped[:, :, None]
 
-        # 4) Convolve with one group per channel → valid mode
+        # --- 5) run grouped conv1d ---
         y = mx.conv1d(
             x_in, w,
-            stride=1,
-            padding=0,
-            dilation=1,
+            stride=stride,
+            padding=pad,
+            dilation=dilation,
             groups=B
         )
-        # y: (1, T-K+1, B) → drop batch and transpose to (B, T-K+1)
+        # y shape: (1, H_out, B)
+
+        # --- 6) return shape (B, H_out) ---
         return y[0].T
-    return inner_f
+
+    return batched_conv1d
 
 @mlx_funcify.register(Blockwise)
 def funcify_Blockwise(op: Blockwise, node, **kwargs):
     if isinstance(op.core_op, Conv1d):
         return blockwise_conv1d(op, node, **kwargs)
     
-    core_f = mlx_funcify(op.core_op)
+    core_f = mlx_funcify(op.core_op, node)
 
     def blockwise_f(*inputs):
         return blockwise_f(*inputs)