diff --git a/pytensor/gradient.py b/pytensor/gradient.py index 022eba2454..db6d3ec436 100644 --- a/pytensor/gradient.py +++ b/pytensor/gradient.py @@ -1951,10 +1951,10 @@ def random_projection(): mode_for_cost = mode cost_fn = fn_maker(tensor_pt, cost, name="gradient.py cost", mode=mode_for_cost) - symbolic_grad = grad(cost, tensor_pt, disconnected_inputs="ignore") grad_fn = fn_maker(tensor_pt, symbolic_grad, name="gradient.py symbolic grad") + grad_fn.dprint(print_shape=True) for test_num in range(n_tests): num_grad = numeric_grad(cost_fn, [p.copy() for p in pt], eps, out_type) diff --git a/pytensor/tensor/basic.py b/pytensor/tensor/basic.py index e789659474..b9bf4df4a5 100644 --- a/pytensor/tensor/basic.py +++ b/pytensor/tensor/basic.py @@ -921,7 +921,7 @@ def zeros_like(model, dtype=None, opt=False): return fill(_model, ret) -def zeros(shape, dtype=None): +def zeros(shape, dtype=None) -> TensorVariable: """Create a `TensorVariable` filled with zeros, closer to NumPy's syntax than ``alloc``.""" if not ( isinstance(shape, np.ndarray | Sequence) @@ -933,7 +933,7 @@ def zeros(shape, dtype=None): return alloc(np.array(0, dtype=dtype), *shape) -def ones(shape, dtype=None): +def ones(shape, dtype=None) -> TensorVariable: """Create a `TensorVariable` filled with ones, closer to NumPy's syntax than ``alloc``.""" if not ( isinstance(shape, np.ndarray | Sequence) diff --git a/pytensor/tensor/signal/conv.py b/pytensor/tensor/signal/conv.py index 9eb9a8abf7..4621a08b1c 100644 --- a/pytensor/tensor/signal/conv.py +++ b/pytensor/tensor/signal/conv.py @@ -1,17 +1,22 @@ -from typing import TYPE_CHECKING, Literal, cast +from typing import TYPE_CHECKING, Literal +from typing import cast as type_cast import numpy as np from numpy import convolve as numpy_convolve +from scipy.signal import convolve2d as scipy_convolve2d from pytensor.gradient import DisconnectedType from pytensor.graph import Apply, Constant +from pytensor.graph.op import Op from pytensor.link.c.op import COp from pytensor.scalar import as_scalar from pytensor.scalar.basic import upcast from pytensor.tensor.basic import as_tensor_variable, join, zeros from pytensor.tensor.blockwise import Blockwise from pytensor.tensor.math import maximum, minimum, switch -from pytensor.tensor.type import vector +from pytensor.tensor.pad import pad +from pytensor.tensor.subtensor import flip +from pytensor.tensor.type import tensor from pytensor.tensor.variable import TensorVariable @@ -19,54 +24,69 @@ from pytensor.tensor import TensorLike -class Convolve1d(COp): +class AbstractConvolveNd: __props__ = () - gufunc_signature = "(n),(k),()->(o)" + ndim: int + + @property + def gufunc_signature(self): + data_signature = ",".join([f"n{i}" for i in range(self.ndim)]) + kernel_signature = ",".join([f"k{i}" for i in range(self.ndim)]) + output_signature = ",".join([f"o{i}" for i in range(self.ndim)]) + + return f"({data_signature}),({kernel_signature}),()->({output_signature})" def make_node(self, in1, in2, full_mode): in1 = as_tensor_variable(in1) in2 = as_tensor_variable(in2) full_mode = as_scalar(full_mode) - if not (in1.ndim == 1 and in2.ndim == 1): - raise ValueError("Convolution inputs must be vector (ndim=1)") + ndim = self.ndim + if not (in1.ndim == ndim and in2.ndim == self.ndim): + raise ValueError( + f"Convolution inputs must have ndim={ndim}, got: in1={in1.ndim}, in2={in2.ndim}" + ) if not full_mode.dtype == "bool": - raise ValueError("Convolution mode must be a boolean type") + raise ValueError("Convolution full_mode flag must be a boolean type") - dtype = upcast(in1.dtype, in2.dtype) - n = in1.type.shape[0] - k = in2.type.shape[0] match full_mode: case Constant(): static_mode = "full" if full_mode.data else "valid" case _: static_mode = None - if n is None or k is None or static_mode is None: - out_shape = (None,) - elif static_mode == "full": - out_shape = (n + k - 1,) - else: # mode == "valid": - out_shape = (max(n, k) - min(n, k) + 1,) + if static_mode is None: + out_shape = (None,) * ndim + else: + out_shape = [] + # TODO: Raise if static shapes are not valid (one input size doesn't dominate the other) + for n, k in zip(in1.type.shape, in2.type.shape): + if n is None or k is None: + out_shape.append(None) + elif static_mode == "full": + out_shape.append( + n + k - 1, + ) + else: # mode == "valid": + out_shape.append( + max(n, k) - min(n, k) + 1, + ) + out_shape = tuple(out_shape) - out = vector(dtype=dtype, shape=out_shape) - return Apply(self, [in1, in2, full_mode], [out]) + dtype = upcast(in1.dtype, in2.dtype) - def perform(self, node, inputs, outputs): - # We use numpy_convolve as that's what scipy would use if method="direct" was passed. - # And mode != "same", which this Op doesn't cover anyway. - in1, in2, full_mode = inputs - outputs[0][0] = numpy_convolve(in1, in2, mode="full" if full_mode else "valid") + out = tensor(dtype=dtype, shape=out_shape) + return Apply(self, [in1, in2, full_mode], [out]) def infer_shape(self, fgraph, node, shapes): _, _, full_mode = node.inputs in1_shape, in2_shape, _ = shapes - n = in1_shape[0] - k = in2_shape[0] - shape_valid = maximum(n, k) - minimum(n, k) + 1 - shape_full = n + k - 1 - shape = switch(full_mode, shape_full, shape_valid) - return [[shape]] + out_shape = [ + switch(full_mode, n + k - 1, maximum(n, k) - minimum(n, k) + 1) + for n, k in zip(in1_shape, in2_shape) + ] + + return [out_shape] def connection_pattern(self, node): return [[True], [True], [False]] @@ -75,22 +95,34 @@ def L_op(self, inputs, outputs, output_grads): in1, in2, full_mode = inputs [grad] = output_grads - n = in1.shape[0] - k = in2.shape[0] + n = in1.shape + k = in2.shape + # Note: this assumes the shape of one input dominates the other over all dimensions (which is required for a valid forward) # If mode is "full", or mode is "valid" and k >= n, then in1_bar mode should use "valid" convolve # The expression below is equivalent to ~(full_mode | (k >= n)) - full_mode_in1_bar = ~full_mode & (k < n) + full_mode_in1_bar = ~full_mode & (k < n).any() # If mode is "full", or mode is "valid" and n >= k, then in2_bar mode should use "valid" convolve # The expression below is equivalent to ~(full_mode | (n >= k)) - full_mode_in2_bar = ~full_mode & (n < k) + full_mode_in2_bar = ~full_mode & (n < k).any() return [ - self(grad, in2[::-1], full_mode_in1_bar), - self(grad, in1[::-1], full_mode_in2_bar), + self(grad, flip(in2), full_mode_in1_bar), + self(grad, flip(in1), full_mode_in2_bar), DisconnectedType()(), ] + +class Convolve1d(AbstractConvolveNd, COp): + __props__ = () + ndim = 1 + + def perform(self, node, inputs, outputs): + # We use numpy_convolve as that's what scipy would use if method="direct" was passed. + # And mode != "same", which this Op doesn't cover anyway. + in1, in2, full_mode = inputs + outputs[0][0] = numpy_convolve(in1, in2, mode="full" if full_mode else "valid") + def c_code_cache_version(self): return (2,) @@ -210,4 +242,91 @@ def convolve1d( mode = "valid" full_mode = as_scalar(np.bool_(mode == "full")) - return cast(TensorVariable, blockwise_convolve_1d(in1, in2, full_mode)) + return type_cast(TensorVariable, blockwise_convolve_1d(in1, in2, full_mode)) + + +class Convolve2d(AbstractConvolveNd, Op): + __props__ = () + ndim = 2 + + def perform(self, node, inputs, outputs): + in1, in2, full_mode = inputs + + # if all(inpt.dtype.kind in ['f', 'c'] for inpt in inputs): + # outputs[0][0] = scipy_convolve(in1, in2, mode=self.mode, method='fft') + # + # else: + # TODO: Why is .item() needed??? + outputs[0][0] = scipy_convolve2d( + in1, + in2, + mode="full" if full_mode.item() else "valid", + ) + + +blockwise_convolve_2d = Blockwise(Convolve2d()) + + +def convolve2d( + in1: "TensorLike", + in2: "TensorLike", + mode: Literal["full", "valid", "same"] = "full", + boundary: Literal["fill", "wrap", "symm"] = "fill", + fillvalue: float | int = 0, +) -> TensorVariable: + """Convolve two two-dimensional arrays. + + Convolve in1 and in2, with the output size determined by the mode argument. + + Parameters + ---------- + in1 : (..., N, M) tensor_like + First input. + in2 : (..., K, L) tensor_like + Second input. + mode : {'full', 'valid', 'same'}, optional + A string indicating the size of the output: + - 'full': The output is the full discrete linear convolution of the inputs, with shape (..., N+K-1, M+L-1). + - 'valid': The output consists only of elements that do not rely on zero-padding, with shape (..., max(N, K) - min(N, K) + 1, max(M, L) - min(M, L) + 1). + - 'same': The output is the same size as in1, centered with respect to the 'full' output. + boundary : {'fill', 'wrap', 'symm'}, optional + A string indicating how to handle boundaries: + - 'fill': Pads the input arrays with fillvalue. + - 'wrap': Circularly wraps the input arrays. + - 'symm': Symmetrically reflects the input arrays. + fillvalue : float or int, optional + The value to use for padding when boundary is 'fill'. Default is 0. + Returns + ------- + out: tensor_variable + The discrete linear convolution of in1 with in2. + + """ + in1 = as_tensor_variable(in1) + in2 = as_tensor_variable(in2) + + if mode == "same": + raise NotImplementedError("same mode not implemented for convolve2d") + + if mode != "valid" and (boundary != "fill" or fillvalue != 0): + # We use a valid convolution on an appropriately padded kernel + *_, k, l = in2.shape + ndim = max(in1.type.ndim, in2.type.ndim) + + pad_width = zeros((ndim, 2), dtype="int64") + pad_width = pad_width[-2, :].set(k - 1) + pad_width = pad_width[-1, :].set(l - 1) + if boundary == "fill": + in1 = pad( + in1, pad_width=pad_width, mode="constant", constant_values=fillvalue + ) + elif boundary == "wrap": + in1 = pad(in1, pad_width=pad_width, mode="wrap") + + elif boundary == "symm": + in1 = pad(in1, pad_width=pad_width, mode="symmetric") + + mode = "valid" + + full_mode = as_scalar(np.bool_(mode == "full")) + return type_cast(TensorVariable, blockwise_convolve_2d(in1, in2, full_mode)) diff --git a/tests/tensor/signal/test_conv.py b/tests/tensor/signal/test_conv.py index 4df25cc1ca..90751d4673 100644 --- a/tests/tensor/signal/test_conv.py +++ b/tests/tensor/signal/test_conv.py @@ -3,13 +3,15 @@ import numpy as np import pytest from scipy.signal import convolve as scipy_convolve +from scipy.signal import convolve2d as scipy_convolve2d from pytensor import config, function, grad from pytensor.graph.rewriting import rewrite_graph from pytensor.graph.traversal import ancestors, io_toposort from pytensor.tensor import matrix, tensor, vector +from pytensor.tensor.basic import expand_dims from pytensor.tensor.blockwise import Blockwise -from pytensor.tensor.signal.conv import Convolve1d, convolve1d +from pytensor.tensor.signal.conv import Convolve1d, convolve1d, convolve2d from tests import unittest_tools as utt @@ -46,7 +48,7 @@ def test_convolve1d_batch(): res = out.eval({x: x_test, y: y_test}) # Second entry of x, y are just y, x respectively, # so res[0] and res[1] should be identical. - rtol = 1e-6 if config.floatX == "float32" else 1e-15 + rtol = 1e-6 if config.floatX == "float32" else 1e-12 res_np = np.convolve(x_test[0], y_test[0]) np.testing.assert_allclose(res[0], res_np, rtol=rtol) np.testing.assert_allclose(res[1], res_np, rtol=rtol) @@ -100,6 +102,7 @@ def test_convolve1d_valid_grad(static_shape): "local_useless_unbatched_blockwise", ), ) + grad_out.dprint() [conv_node] = [ node for node in io_toposort([larger, smaller], [grad_out]) @@ -137,3 +140,63 @@ def convolve1d_grad_benchmarker(convolve_mode, mode, benchmark): @pytest.mark.parametrize("convolve_mode", ["full", "valid"]) def test_convolve1d_grad_benchmark_c(convolve_mode, benchmark): convolve1d_grad_benchmarker(convolve_mode, "FAST_RUN", benchmark) + + +@pytest.mark.parametrize( + "kernel_shape", [(3, 3), (5, 3), (5, 8)], ids=lambda x: f"kernel_shape={x}" +) +@pytest.mark.parametrize( + "data_shape", [(3, 3), (5, 5), (8, 8)], ids=lambda x: f"data_shape={x}" +) +@pytest.mark.parametrize("mode", ["full", "valid", "same"][:-1]) +@pytest.mark.parametrize( + "boundary, boundary_kwargs", + [ + ("fill", {"fillvalue": 0}), + ("fill", {"fillvalue": 0.5}), + ("wrap", {}), + ("symm", {}), + ], +) +def test_convolve2d(kernel_shape, data_shape, mode, boundary, boundary_kwargs): + data = matrix("data") + kernel = matrix("kernel") + op = partial(convolve2d, mode=mode, boundary=boundary, **boundary_kwargs) + conv_result = op(data, kernel) + + fn = function([data, kernel], conv_result) + + rng = np.random.default_rng((26, kernel_shape, data_shape, sum(map(ord, mode)))) + data_val = rng.normal(size=data_shape).astype(data.dtype) + kernel_val = rng.normal(size=kernel_shape).astype(kernel.dtype) + + np.testing.assert_allclose( + fn(data_val, kernel_val), + scipy_convolve2d( + data_val, kernel_val, mode=mode, boundary=boundary, **boundary_kwargs + ), + atol=1e-6 if config.floatX == "float32" else 1e-8, + ) + + utt.verify_grad(lambda k: op(data_val, k).sum(), [kernel_val]) + + +def test_batched_1d_agrees_with_diagonal_2d(): + data = matrix("data") + kernel_1d = vector("kernel_1d") + kernel_2d = expand_dims(kernel_1d, 0) + + output_1d = convolve1d(data, kernel_1d, mode="valid") + output_2d = convolve2d(data, kernel_2d, mode="valid") + + grad_1d = grad(output_1d.sum(), kernel_1d).ravel() + grad_2d = grad(output_1d.sum(), kernel_1d).ravel() + + fn = function([data, kernel_1d], [output_1d, output_2d, grad_1d, grad_2d]) + + data_val = np.random.normal(size=(10, 8)).astype(config.floatX) + kernel_1d_val = np.random.normal(size=(3,)).astype(config.floatX) + + forward_1d, forward_2d, backward_1d, backward_2d = fn(data_val, kernel_1d_val) + np.testing.assert_allclose(forward_1d, forward_2d) + np.testing.assert_allclose(backward_1d, backward_2d)