From 1db47cd697a7e227aa6cda0234694cacd7b752a7 Mon Sep 17 00:00:00 2001
From: Franklin Moormann <cheatcountry@gmail.com>
Date: Sun, 23 Nov 2025 19:45:39 -0500
Subject: [PATCH] fix: add tensormatmul and tensortranspose to iengine for
 proper integration

Added missing IEngine methods for tensor-level matrix operations to enable
GPU acceleration in autodiff computation graphs. TensorOperations currently
uses Tensor.MatrixMultiply and Tensor.Transpose which don't leverage IEngine,
but this infrastructure enables future optimization.

Changes:
- Add TensorMatMul and TensorTranspose methods to IEngine interface
- Implement both methods in CpuEngine with standard matrix algorithms
- Implement both methods in GpuEngine delegating to Matrix operations
- Add helper methods ToMatrix/ToTensor for 2D tensor-matrix conversion

This completes the IEngine interface for tensor operations required by
autodiff, even though TensorOperations doesn't use them yet due to
ComputationNode lacking an Engine reference.

Generated with Claude Code

Co-Authored-By: Claude <noreply@anthropic.com>
---
 src/Engines/CpuEngine.cs | 70 ++++++++++++++++++++++++++++
 src/Engines/GpuEngine.cs | 98 ++++++++++++++++++++++++++++++++++++++++
 src/Engines/IEngine.cs   | 39 ++++++++++++++++
 3 files changed, 207 insertions(+)
diff --git a/src/Engines/CpuEngine.cs b/src/Engines/CpuEngine.cs
index fc0745a0e..111eb7500 100644
--- a/src/Engines/CpuEngine.cs
+++ b/src/Engines/CpuEngine.cs
@@ -729,6 +729,76 @@ public Tensor<T> BatchMatMul<T>(Tensor<T> a, Tensor<T> b)
         return result;
     }
 
+    /// <inheritdoc/>
+    public Tensor<T> TensorMatMul<T>(Tensor<T> a, Tensor<T> b)
+    {
+        if (a == null) throw new ArgumentNullException(nameof(a));
+        if (b == null) throw new ArgumentNullException(nameof(b));
+        if (a.Rank != 2 || b.Rank != 2)
+        {
+            throw new ArgumentException(
+                $"TensorMatMul requires 2D tensors. Got ranks {a.Rank} and {b.Rank}.");
+        }
+
+        int m = a.Shape[0];
+        int k = a.Shape[1];
+        int k2 = b.Shape[0];
+        int n = b.Shape[1];
+
+        if (k != k2)
+        {
+            throw new ArgumentException(
+                $"Matrix dimensions incompatible for multiplication. " +
+                $"First tensor has shape [{m}, {k}], second has shape [{k2}, {n}]. " +
+                $"Inner dimensions must match ({k} != {k2}).");
+        }
+
+        var numOps = MathHelper.GetNumericOperations<T>();
+        var result = new Tensor<T>(new[] { m, n });
+
+        // Standard matrix multiplication: C = A @ B
+        for (int i = 0; i < m; i++)
+        {
+            for (int j = 0; j < n; j++)
+            {
+                T sum = numOps.Zero;
+                for (int p = 0; p < k; p++)
+                {
+                    sum = numOps.Add(sum, numOps.Multiply(a[i, p], b[p, j]));
+                }
+                result[i, j] = sum;
+            }
+        }
+
+        return result;
+    }
+
+    /// <inheritdoc/>
+    public Tensor<T> TensorTranspose<T>(Tensor<T> tensor)
+    {
+        if (tensor == null) throw new ArgumentNullException(nameof(tensor));
+        if (tensor.Rank != 2)
+        {
+            throw new ArgumentException(
+                $"TensorTranspose requires a 2D tensor. Got rank {tensor.Rank}.");
+        }
+
+        int rows = tensor.Shape[0];
+        int cols = tensor.Shape[1];
+        var result = new Tensor<T>(new[] { cols, rows });
+
+        // Transpose: result[j, i] = tensor[i, j]
+        for (int i = 0; i < rows; i++)
+        {
+            for (int j = 0; j < cols; j++)
+            {
+                result[j, i] = tensor[i, j];
+            }
+        }
+
+        return result;
+    }
+
     /// <inheritdoc/>
     public Tensor<T> TensorAdd<T>(Tensor<T> a, Tensor<T> b)
     {
diff --git a/src/Engines/GpuEngine.cs b/src/Engines/GpuEngine.cs
index 5def66643..11c659dce 100644
--- a/src/Engines/GpuEngine.cs
+++ b/src/Engines/GpuEngine.cs
@@ -3965,6 +3965,104 @@ private Tensor<double> BatchMatMulGpuDouble(Tensor<double> a, Tensor<double> b)
         }
     }
 
+    /// <inheritdoc/>
+    public Tensor<T> TensorMatMul<T>(Tensor<T> a, Tensor<T> b)
+    {
+        // Adaptive execution: check size threshold (Phase B: US-GPU-004)
+        // Use matrix multiply threshold since this is a matrix operation
+        if (Math.Max(a.Shape[0], a.Shape[1]) < _thresholds.MatrixMultiply)
+        {
+            return _cpuFallback.TensorMatMul(a, b);
+        }
+
+        // Check GPU health and type support (Phase B: US-GPU-006)
+        if (SupportsGpu && _gpuHealthy)
+        {
+            // For 2D tensors, we can use matrix operations directly
+            // Convert to Matrix, multiply, convert back to Tensor
+            if (typeof(T) == typeof(float))
+            {
+                var matrixA = ToMatrix((Tensor<float>)(object)a);
+                var matrixB = ToMatrix((Tensor<float>)(object)b);
+                var resultMatrix = MatrixMultiply(matrixA, matrixB);
+                return (Tensor<T>)(object)ToTensor(resultMatrix);
+            }
+            if (typeof(T) == typeof(double))
+            {
+                var matrixA = ToMatrix((Tensor<double>)(object)a);
+                var matrixB = ToMatrix((Tensor<double>)(object)b);
+                var resultMatrix = MatrixMultiply(matrixA, matrixB);
+                return (Tensor<T>)(object)ToTensor(resultMatrix);
+            }
+        }
+
+        // Fallback to CPU for unsupported types or unhealthy GPU
+        return _cpuFallback.TensorMatMul(a, b);
+    }
+
+    /// <inheritdoc/>
+    public Tensor<T> TensorTranspose<T>(Tensor<T> tensor)
+    {
+        // Adaptive execution: check size threshold (Phase B: US-GPU-004)
+        // Use MatrixMultiply threshold as a proxy for transpose threshold
+        if (Math.Max(tensor.Shape[0], tensor.Shape[1]) < _thresholds.MatrixMultiply)
+        {
+            return _cpuFallback.TensorTranspose(tensor);
+        }
+
+        // Check GPU health and type support (Phase B: US-GPU-006)
+        if (SupportsGpu && _gpuHealthy)
+        {
+            // For 2D tensors, we can use matrix transpose directly
+            // Convert to Matrix, transpose, convert back to Tensor
+            if (typeof(T) == typeof(float))
+            {
+                var matrix = ToMatrix((Tensor<float>)(object)tensor);
+                var resultMatrix = MatrixTranspose(matrix);
+                return (Tensor<T>)(object)ToTensor(resultMatrix);
+            }
+            if (typeof(T) == typeof(double))
+            {
+                var matrix = ToMatrix((Tensor<double>)(object)tensor);
+                var resultMatrix = MatrixTranspose(matrix);
+                return (Tensor<T>)(object)ToTensor(resultMatrix);
+            }
+        }
+
+        // Fallback to CPU for unsupported types or unhealthy GPU
+        return _cpuFallback.TensorTranspose(tensor);
+    }
+
+    // Helper methods to convert between Matrix and Tensor for 2D operations
+    private static Matrix<T> ToMatrix<T>(Tensor<T> tensor)
+    {
+        if (tensor.Rank != 2)
+            throw new ArgumentException("Tensor must be 2D to convert to Matrix");
+
+        var matrix = new Matrix<T>(tensor.Shape[0], tensor.Shape[1]);
+        for (int i = 0; i < tensor.Shape[0]; i++)
+        {
+            for (int j = 0; j < tensor.Shape[1]; j++)
+            {
+                matrix[i, j] = tensor[i, j];
+            }
+        }
+        return matrix;
+    }
+
+    private static Tensor<T> ToTensor<T>(Matrix<T> matrix)
+    {
+        var tensor = new Tensor<T>(new[] { matrix.Rows, matrix.Columns });
+        for (int i = 0; i < matrix.Rows; i++)
+        {
+            for (int j = 0; j < matrix.Columns; j++)
+            {
+                tensor[i, j] = matrix[i, j];
+            }
+        }
+        return tensor;
+    }
+
     /// <inheritdoc/>
     public Tensor<T> TensorAdd<T>(Tensor<T> a, Tensor<T> b)
     {
diff --git a/src/Engines/IEngine.cs b/src/Engines/IEngine.cs
index b67cc69b2..4504c8c8b 100644
--- a/src/Engines/IEngine.cs
+++ b/src/Engines/IEngine.cs
@@ -691,6 +691,45 @@ public interface IEngine
     /// </remarks>
     Tensor<T> BatchMatMul<T>(Tensor<T> a, Tensor<T> b);
 
+    /// <summary>
+    /// Performs matrix multiplication on two 2D tensors.
+    /// </summary>
+    /// <typeparam name="T">The numeric type of tensor elements.</typeparam>
+    /// <param name="a">The first tensor (M x K) - must be 2D.</param>
+    /// <param name="b">The second tensor (K x N) - must be 2D.</param>
+    /// <returns>The product tensor (M x N).</returns>
+    /// <exception cref="ArgumentException">Thrown when tensors are not 2D or dimensions are incompatible.</exception>
+    /// <remarks>
+    /// <para>
+    /// Matrix multiplication for 2D tensors. This is the tensor equivalent of MatrixMultiply.
+    /// Used in autodiff computation graphs where operations work with Tensor types.
+    /// </para>
+    /// <para>
+    /// For 2D tensors: result[i,j] = sum(a[i,k] * b[k,j]) for all k.
+    /// GPU acceleration provides significant speedup for large matrices.
+    /// </para>
+    /// </remarks>
+    Tensor<T> TensorMatMul<T>(Tensor<T> a, Tensor<T> b);
+
+    /// <summary>
+    /// Transposes a 2D tensor (swaps rows and columns).
+    /// </summary>
+    /// <typeparam name="T">The numeric type of tensor elements.</typeparam>
+    /// <param name="tensor">The input tensor (M x N) - must be 2D.</param>
+    /// <returns>The transposed tensor (N x M).</returns>
+    /// <exception cref="ArgumentException">Thrown when tensor is not 2D.</exception>
+    /// <remarks>
+    /// <para>
+    /// Transpose operation for 2D tensors. This is the tensor equivalent of MatrixTranspose.
+    /// Used in autodiff computation graphs where operations work with Tensor types.
+    /// </para>
+    /// <para>
+    /// For a 2D tensor, swaps dimensions: if input has shape [M, N], result has shape [N, M].
+    /// GPU acceleration provides speedup for large tensors through coalesced memory access.
+    /// </para>
+    /// </remarks>
+    Tensor<T> TensorTranspose<T>(Tensor<T> tensor);
+
     /// <summary>
     /// Adds two tensors element-wise.
     /// </summary>