fix: add flat indexing methods to TensorBase and fix tensor access patterns

franklinic · claude · franklinic · commit f0bccc918acb · 2025-11-30T23:33:03.000-05:00
Add GetFlat/SetFlat methods to TensorBase for accessing tensor data by linear index, fixing test failures in gradient correctness tests that were using single-integer indexing on multi-dimensional tensors. - Add GetFlat(int flatIndex) and SetFlat(int flatIndex, T value) to TensorBase - Fix GumbelSoftmax, TaylorSoftmax, and Pad methods in TensorOperations.cs - Update GradientCorrectnessTests to use flat indexing helper methods 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
diff --git a/src/AiDotNet.Tensors/LinearAlgebra/TensorBase.cs b/src/AiDotNet.Tensors/LinearAlgebra/TensorBase.cs
@@ -259,6 +259,40 @@ internal Span<T> AsWritableSpan()
         return _data.AsWritableSpan();
     }
 
+    /// <summary>
+    /// Gets the value at a flat (linear) index in the underlying data.
+    /// </summary>
+    /// <param name="flatIndex">The flat index (0 to Length-1).</param>
+    /// <returns>The value at the specified flat index.</returns>
+    /// <remarks>
+    /// <para><b>For Beginners:</b> This allows accessing tensor elements using a single
+    /// index that treats the tensor as a 1D array. The flat index corresponds to
+    /// row-major ordering where the last dimension varies fastest.</para>
+    /// </remarks>
+    public T GetFlat(int flatIndex)
+    {
+        if (flatIndex < 0 || flatIndex >= Length)
+            throw new ArgumentOutOfRangeException(nameof(flatIndex), "Flat index is out of range.");
+        return _data[flatIndex];
+    }
+
+    /// <summary>
+    /// Sets the value at a flat (linear) index in the underlying data.
+    /// </summary>
+    /// <param name="flatIndex">The flat index (0 to Length-1).</param>
+    /// <param name="value">The value to set.</param>
+    /// <remarks>
+    /// <para><b>For Beginners:</b> This allows setting tensor elements using a single
+    /// index that treats the tensor as a 1D array. The flat index corresponds to
+    /// row-major ordering where the last dimension varies fastest.</para>
+    /// </remarks>
+    public void SetFlat(int flatIndex, T value)
+    {
+        if (flatIndex < 0 || flatIndex >= Length)
+            throw new ArgumentOutOfRangeException(nameof(flatIndex), "Flat index is out of range.");
+        _data[flatIndex] = value;
+    }
+
     /// <summary>
     /// Returns a string representation of the tensor.
     /// </summary>
diff --git a/src/Autodiff/TensorOperations.cs b/src/Autodiff/TensorOperations.cs
@@ -2491,7 +2491,7 @@ public static ComputationNode<T> Pad(ComputationNode<T> a, int[,] padWidth, T? v
             // Initialize with pad value
             for (int i = 0; i < result.Length; i++)
             {
-                result[i] = padValue;
+                result.SetFlat(i, padValue);
             }
             // Copy input data to center
             for (int r = 0; r < inputRows; r++)
@@ -2546,7 +2546,7 @@ void BackwardFunction(Tensor<T> gradient)
             // Initialize with pad value
             for (int i = 0; i < result.Length; i++)
             {
-                result[i] = padValue;
+                result.SetFlat(i, padValue);
             }
 
             // Copy input data to appropriate location
@@ -7515,15 +7515,15 @@ public static ComputationNode<T> GumbelSoftmax(ComputationNode<T> logits, double
             var u = random.NextDouble();
             u = Math.Max(u, eps);
             u = Math.Min(u, 1 - eps);
-            gumbel[i] = numOps.FromDouble(-Math.Log(-Math.Log(u)));
+            gumbel.SetFlat(i, numOps.FromDouble(-Math.Log(-Math.Log(u))));
         }
 
         // Compute soft samples: softmax((logits + gumbel) / temperature)
         var tempTensor = new Tensor<T>(shape);
         for (int i = 0; i < tempTensor.Length; i++)
         {
-            var val = numOps.Add(logits.Value[i], gumbel[i]);
-            tempTensor[i] = numOps.Divide(val, numOps.FromDouble(temperature));
+            var val = numOps.Add(logits.Value.GetFlat(i), gumbel.GetFlat(i));
+            tempTensor.SetFlat(i, numOps.Divide(val, numOps.FromDouble(temperature)));
         }
 
         // Apply softmax along last axis
@@ -7541,18 +7541,18 @@ public static ComputationNode<T> GumbelSoftmax(ComputationNode<T> logits, double
             for (int b = 0; b < batchSize; b++)
             {
                 int maxIdx = 0;
-                T maxVal = softResult[b * lastDim];
+                T maxVal = softResult.GetFlat(b * lastDim);
                 for (int i = 1; i < lastDim; i++)
                 {
-                    if (numOps.GreaterThan(softResult[b * lastDim + i], maxVal))
+                    if (numOps.GreaterThan(softResult.GetFlat(b * lastDim + i), maxVal))
                     {
-                        maxVal = softResult[b * lastDim + i];
+                        maxVal = softResult.GetFlat(b * lastDim + i);
                         maxIdx = i;
                     }
                 }
                 for (int i = 0; i < lastDim; i++)
                 {
-                    hardResult[b * lastDim + i] = i == maxIdx ? numOps.One : numOps.Zero;
+                    hardResult.SetFlat(b * lastDim + i, i == maxIdx ? numOps.One : numOps.Zero);
                 }
             }
 
@@ -9112,7 +9112,7 @@ public static ComputationNode<T> TaylorSoftmax(ComputationNode<T> a, int order =
                 for (int i = 0; i < axisSize; i++)
                 {
                     int flatIdx = outer * axisSize * innerSize + i * innerSize + inner;
-                    var x = a.Value[flatIdx];
+                    var x = a.Value.GetFlat(flatIdx);
                     var taylorExp = numOps.One; // Start with 1
                     var xPower = numOps.One;
 
@@ -9128,15 +9128,15 @@ public static ComputationNode<T> TaylorSoftmax(ComputationNode<T> a, int order =
                         ? taylorExp
                         : numOps.FromDouble(1e-10);
 
-                    taylorExpValues[flatIdx] = taylorExp;
+                    taylorExpValues.SetFlat(flatIdx, taylorExp);
                     expSum = numOps.Add(expSum, taylorExp);
                 }
 
                 // Normalize
                 for (int i = 0; i < axisSize; i++)
                 {
                     int flatIdx = outer * axisSize * innerSize + i * innerSize + inner;
-                    result[flatIdx] = numOps.Divide(taylorExpValues[flatIdx], expSum);
+                    result.SetFlat(flatIdx, numOps.Divide(taylorExpValues.GetFlat(flatIdx), expSum));
                 }
             }
         }
@@ -9163,7 +9163,7 @@ void BackwardFunction(Tensor<T> gradient)
                         for (int i = 0; i < capturedAxisSize; i++)
                         {
                             int flatIdx = outer * capturedAxisSize * capturedInnerSize + i * capturedInnerSize + inner;
-                            expSum = numOps.Add(expSum, taylorExpValues[flatIdx]);
+                            expSum = numOps.Add(expSum, taylorExpValues.GetFlat(flatIdx));
                         }
 
                         // Softmax-style Jacobian: s_i * (δ_ij - s_j)
@@ -9172,19 +9172,19 @@ void BackwardFunction(Tensor<T> gradient)
                         {
                             int flatIdx = outer * capturedAxisSize * capturedInnerSize + i * capturedInnerSize + inner;
                             dotProduct = numOps.Add(dotProduct,
-                                numOps.Multiply(gradient[flatIdx], result[flatIdx]));
+                                numOps.Multiply(gradient.GetFlat(flatIdx), result.GetFlat(flatIdx)));
                         }
 
                         for (int i = 0; i < capturedAxisSize; i++)
                         {
                             int flatIdx = outer * capturedAxisSize * capturedInnerSize + i * capturedInnerSize + inner;
                             // Softmax gradient part: s_i * (grad_i - dot(grad, s))
-                            var softmaxGrad = numOps.Multiply(result[flatIdx],
-                                numOps.Subtract(gradient[flatIdx], dotProduct));
+                            var softmaxGrad = numOps.Multiply(result.GetFlat(flatIdx),
+                                numOps.Subtract(gradient.GetFlat(flatIdx), dotProduct));
 
                             // Taylor exp derivative: d/dx[1 + x + x²/2! + ... + x^n/n!] = 1 + x + ... + x^(n-1)/(n-1)!
                             // This is Taylor_{n-1}(x) for exp
-                            var x = a.Value[flatIdx];
+                            var x = a.Value.GetFlat(flatIdx);
                             var taylorExpDeriv = numOps.One;
                             var xPower = numOps.One;
                             for (int n = 1; n < capturedOrder; n++)
@@ -9197,9 +9197,9 @@ void BackwardFunction(Tensor<T> gradient)
                             // For y_i = g(x_i) / sum_j(g(x_j)), the chain rule requires:
                             // grad_x_i = softmaxGrad * g'(x_i) / g(x_i)
                             // where g is the Taylor approximation of exp
-                            var gVal = taylorExpValues[flatIdx];
+                            var gVal = taylorExpValues.GetFlat(flatIdx);
                             var gPrimeOverG = numOps.Divide(taylorExpDeriv, gVal);
-                            gradA[flatIdx] = numOps.Multiply(softmaxGrad, gPrimeOverG);
+                            gradA.SetFlat(flatIdx, numOps.Multiply(softmaxGrad, gPrimeOverG));
                         }
                     }
                 }
diff --git a/tests/AiDotNet.Tests/UnitTests/Autodiff/GradientCorrectnessTests.cs b/tests/AiDotNet.Tests/UnitTests/Autodiff/GradientCorrectnessTests.cs

Original file line number	Diff line number	Diff line change
`@@ -2491,7 +2491,7 @@ public static ComputationNode<T> Pad(ComputationNode<T> a, int[,] padWidth, T? v`
`2491`	`2491`	`// Initialize with pad value`
`2492`	`2492`	`for (int i = 0; i < result.Length; i++)`
`2493`	`2493`	`{`
`2494`		`- result[i] = padValue;`
	`2494`	`+ result.SetFlat(i, padValue);`
`2495`	`2495`	`}`
`2496`	`2496`	`// Copy input data to center`
`2497`	`2497`	`for (int r = 0; r < inputRows; r++)`
`@@ -2546,7 +2546,7 @@ void BackwardFunction(Tensor<T> gradient)`
`2546`	`2546`	`// Initialize with pad value`
`2547`	`2547`	`for (int i = 0; i < result.Length; i++)`
`2548`	`2548`	`{`
`2549`		`- result[i] = padValue;`
	`2549`	`+ result.SetFlat(i, padValue);`
`2550`	`2550`	`}`
`2551`	`2551`
`2552`	`2552`	`// Copy input data to appropriate location`
`@@ -7515,15 +7515,15 @@ public static ComputationNode<T> GumbelSoftmax(ComputationNode<T> logits, double`
`7515`	`7515`	`var u = random.NextDouble();`
`7516`	`7516`	`u = Math.Max(u, eps);`
`7517`	`7517`	`u = Math.Min(u, 1 - eps);`
`7518`		`- gumbel[i] = numOps.FromDouble(-Math.Log(-Math.Log(u)));`
	`7518`	`+ gumbel.SetFlat(i, numOps.FromDouble(-Math.Log(-Math.Log(u))));`
`7519`	`7519`	`}`
`7520`	`7520`
`7521`	`7521`	`// Compute soft samples: softmax((logits + gumbel) / temperature)`
`7522`	`7522`	`var tempTensor = new Tensor<T>(shape);`
`7523`	`7523`	`for (int i = 0; i < tempTensor.Length; i++)`
`7524`	`7524`	`{`
`7525`		`- var val = numOps.Add(logits.Value[i], gumbel[i]);`
`7526`		`- tempTensor[i] = numOps.Divide(val, numOps.FromDouble(temperature));`
	`7525`	`+ var val = numOps.Add(logits.Value.GetFlat(i), gumbel.GetFlat(i));`
	`7526`	`+ tempTensor.SetFlat(i, numOps.Divide(val, numOps.FromDouble(temperature)));`
`7527`	`7527`	`}`
`7528`	`7528`
`7529`	`7529`	`// Apply softmax along last axis`
`@@ -7541,18 +7541,18 @@ public static ComputationNode<T> GumbelSoftmax(ComputationNode<T> logits, double`
`7541`	`7541`	`for (int b = 0; b < batchSize; b++)`
`7542`	`7542`	`{`
`7543`	`7543`	`int maxIdx = 0;`
`7544`		`- T maxVal = softResult[b * lastDim];`
	`7544`	`+ T maxVal = softResult.GetFlat(b * lastDim);`
`7545`	`7545`	`for (int i = 1; i < lastDim; i++)`
`7546`	`7546`	`{`
`7547`		`- if (numOps.GreaterThan(softResult[b * lastDim + i], maxVal))`
	`7547`	`+ if (numOps.GreaterThan(softResult.GetFlat(b * lastDim + i), maxVal))`
`7548`	`7548`	`{`
`7549`		`- maxVal = softResult[b * lastDim + i];`
	`7549`	`+ maxVal = softResult.GetFlat(b * lastDim + i);`
`7550`	`7550`	`maxIdx = i;`
`7551`	`7551`	`}`
`7552`	`7552`	`}`
`7553`	`7553`	`for (int i = 0; i < lastDim; i++)`
`7554`	`7554`	`{`
`7555`		`- hardResult[b * lastDim + i] = i == maxIdx ? numOps.One : numOps.Zero;`
	`7555`	`+ hardResult.SetFlat(b * lastDim + i, i == maxIdx ? numOps.One : numOps.Zero);`
`7556`	`7556`	`}`
`7557`	`7557`	`}`
`7558`	`7558`
`@@ -9112,7 +9112,7 @@ public static ComputationNode<T> TaylorSoftmax(ComputationNode<T> a, int order =`
`9112`	`9112`	`for (int i = 0; i < axisSize; i++)`
`9113`	`9113`	`{`
`9114`	`9114`	`int flatIdx = outer * axisSize * innerSize + i * innerSize + inner;`
`9115`		`- var x = a.Value[flatIdx];`
	`9115`	`+ var x = a.Value.GetFlat(flatIdx);`
`9116`	`9116`	`var taylorExp = numOps.One; // Start with 1`
`9117`	`9117`	`var xPower = numOps.One;`
`9118`	`9118`
`@@ -9128,15 +9128,15 @@ public static ComputationNode<T> TaylorSoftmax(ComputationNode<T> a, int order =`
`9128`	`9128`	`? taylorExp`
`9129`	`9129`	`: numOps.FromDouble(1e-10);`
`9130`	`9130`
`9131`		`- taylorExpValues[flatIdx] = taylorExp;`
	`9131`	`+ taylorExpValues.SetFlat(flatIdx, taylorExp);`
`9132`	`9132`	`expSum = numOps.Add(expSum, taylorExp);`
`9133`	`9133`	`}`
`9134`	`9134`
`9135`	`9135`	`// Normalize`
`9136`	`9136`	`for (int i = 0; i < axisSize; i++)`
`9137`	`9137`	`{`
`9138`	`9138`	`int flatIdx = outer * axisSize * innerSize + i * innerSize + inner;`
`9139`		`- result[flatIdx] = numOps.Divide(taylorExpValues[flatIdx], expSum);`
	`9139`	`+ result.SetFlat(flatIdx, numOps.Divide(taylorExpValues.GetFlat(flatIdx), expSum));`
`9140`	`9140`	`}`
`9141`	`9141`	`}`
`9142`	`9142`	`}`
`@@ -9163,7 +9163,7 @@ void BackwardFunction(Tensor<T> gradient)`
`9163`	`9163`	`for (int i = 0; i < capturedAxisSize; i++)`
`9164`	`9164`	`{`
`9165`	`9165`	`int flatIdx = outer * capturedAxisSize * capturedInnerSize + i * capturedInnerSize + inner;`
`9166`		`- expSum = numOps.Add(expSum, taylorExpValues[flatIdx]);`
	`9166`	`+ expSum = numOps.Add(expSum, taylorExpValues.GetFlat(flatIdx));`
`9167`	`9167`	`}`
`9168`	`9168`
`9169`	`9169`	`// Softmax-style Jacobian: s_i * (δ_ij - s_j)`
`@@ -9172,19 +9172,19 @@ void BackwardFunction(Tensor<T> gradient)`
`9172`	`9172`	`{`
`9173`	`9173`	`int flatIdx = outer * capturedAxisSize * capturedInnerSize + i * capturedInnerSize + inner;`
`9174`	`9174`	`dotProduct = numOps.Add(dotProduct,`
`9175`		`- numOps.Multiply(gradient[flatIdx], result[flatIdx]));`
	`9175`	`+ numOps.Multiply(gradient.GetFlat(flatIdx), result.GetFlat(flatIdx)));`
`9176`	`9176`	`}`
`9177`	`9177`
`9178`	`9178`	`for (int i = 0; i < capturedAxisSize; i++)`
`9179`	`9179`	`{`
`9180`	`9180`	`int flatIdx = outer * capturedAxisSize * capturedInnerSize + i * capturedInnerSize + inner;`
`9181`	`9181`	`// Softmax gradient part: s_i * (grad_i - dot(grad, s))`
`9182`		`- var softmaxGrad = numOps.Multiply(result[flatIdx],`
`9183`		`- numOps.Subtract(gradient[flatIdx], dotProduct));`
	`9182`	`+ var softmaxGrad = numOps.Multiply(result.GetFlat(flatIdx),`
	`9183`	`+ numOps.Subtract(gradient.GetFlat(flatIdx), dotProduct));`
`9184`	`9184`
`9185`	`9185`	`// Taylor exp derivative: d/dx[1 + x + x²/2! + ... + x^n/n!] = 1 + x + ... + x^(n-1)/(n-1)!`
`9186`	`9186`	`// This is Taylor_{n-1}(x) for exp`
`9187`		`- var x = a.Value[flatIdx];`
	`9187`	`+ var x = a.Value.GetFlat(flatIdx);`
`9188`	`9188`	`var taylorExpDeriv = numOps.One;`
`9189`	`9189`	`var xPower = numOps.One;`
`9190`	`9190`	`for (int n = 1; n < capturedOrder; n++)`
`@@ -9197,9 +9197,9 @@ void BackwardFunction(Tensor<T> gradient)`
`9197`	`9197`	`// For y_i = g(x_i) / sum_j(g(x_j)), the chain rule requires:`
`9198`	`9198`	`// grad_x_i = softmaxGrad * g'(x_i) / g(x_i)`
`9199`	`9199`	`// where g is the Taylor approximation of exp`
`9200`		`- var gVal = taylorExpValues[flatIdx];`
	`9200`	`+ var gVal = taylorExpValues.GetFlat(flatIdx);`
`9201`	`9201`	`var gPrimeOverG = numOps.Divide(taylorExpDeriv, gVal);`
`9202`		`- gradA[flatIdx] = numOps.Multiply(softmaxGrad, gPrimeOverG);`
	`9202`	`+ gradA.SetFlat(flatIdx, numOps.Multiply(softmaxGrad, gPrimeOverG));`
`9203`	`9203`	`}`
`9204`	`9204`	`}`
`9205`	`9205`	`}`