From 3d3d6536fdd09c6657ca06875cca4201bf2f99a2 Mon Sep 17 00:00:00 2001
From: Christian Helgeson <62450112+cmhhelgeson@users.noreply.github.com>
Date: Tue, 30 Sep 2025 16:04:22 -0700
Subject: [PATCH 01/10] init branch

---
 src/Three.TSL.js           |  3 +++
 src/nodes/math/MathNode.js | 46 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+)

diff --git a/src/Three.TSL.js b/src/Three.TSL.js
index e31e626a38a2b9..529be9a5798632 100644
--- a/src/Three.TSL.js
+++ b/src/Three.TSL.js
@@ -80,6 +80,7 @@ export const batch = TSL.batch;
 export const bentNormalView = TSL.bentNormalView;
 export const billboarding = TSL.billboarding;
 export const bitAnd = TSL.bitAnd;
+export const bitCount = TSL.bitCount;
 export const bitNot = TSL.bitNot;
 export const bitOr = TSL.bitOr;
 export const bitXor = TSL.bitXor;
@@ -183,6 +184,8 @@ export const expression = TSL.expression;
 export const faceDirection = TSL.faceDirection;
 export const faceForward = TSL.faceForward;
 export const faceforward = TSL.faceforward;
+export const findLSB = TSL.findLSB;
+export const findMSB = TSL.findMSB;
 export const float = TSL.float;
 export const floatBitsToInt = TSL.floatBitsToInt;
 export const floatBitsToUint = TSL.floatBitsToUint;
diff --git a/src/nodes/math/MathNode.js b/src/nodes/math/MathNode.js
index 8271477e82f6ec..ca221ec1c0e671 100644
--- a/src/nodes/math/MathNode.js
+++ b/src/nodes/math/MathNode.js
@@ -364,6 +364,9 @@ MathNode.FWIDTH = 'fwidth';
 MathNode.TRANSPOSE = 'transpose';
 MathNode.DETERMINANT = 'determinant';
 MathNode.INVERSE = 'inverse';
+MathNode.COUNT_TRAILING_ZEROS = 'countTrailingZeros';
+MathNode.COUNT_LEADING_ZEROS = 'countLeadingZeros';
+MathNode.COUNT_ONE_BITS = 'countOneBits';
 
 // 2 inputs
 
@@ -1099,10 +1102,50 @@ export const atan2 = ( y, x ) => { // @deprecated, r172
 
 };
 
+
+/**
+ * Finds the number of consecutive 0 bits from the least significant bit of the input value,
+ * which is also the index of the least significant bit of the input value.
+ *
+ * Can only be used with {@link WebGPURenderer} and a WebGPU backend.
+ *
+ * @tsl
+ * @function
+ * @param {Node | number} x - The input value.
+ * @returns {Node}
+ */
+export const countTrailingZeros = /*@__PURE__*/ nodeProxyIntent( MathNode, MathNode.COUNT_TRAILING_ZEROS ).setParameterLength( 1 );
+
+/**
+ * Finds the number of consecutive 0 bits starting from the most significant bit of the input value.
+ *
+ * Can only be used with {@link WebGPURenderer} and a WebGPU backend.
+ *
+ * @tsl
+ * @function
+ * @param {Node | number} x - The input value.
+ * @returns {Node}
+ */
+export const countLeadingZeros = /*@__PURE__*/ nodeProxyIntent( MathNode, MathNode.COUNT_LEADING_ZEROS ).setParameterLength( 1 );
+
+/**
+ * Finds the number of '1' bits set in the input value
+ *
+ * Can only be used with {@link WebGPURenderer} and a WebGPU backend.
+ *
+ * @tsl
+ * @function
+ * @returns {Node}
+ */
+export const countOneBits = /*@__PURE__*/ nodeProxyIntent( MathNode, MathNode.COUNT_ONE_BITS ).setParameterLength( 1 );
+
 // GLSL alias function
 
 export const faceforward = faceForward;
 export const inversesqrt = inverseSqrt;
+export const findLSB = countTrailingZeros;
+export const findMSB = countLeadingZeros;
+export const bitCount = countOneBits;
 
 // Method chaining
 
@@ -1165,3 +1208,6 @@ addMethodChaining( 'transpose', transpose );
 addMethodChaining( 'determinant', determinant );
 addMethodChaining( 'inverse', inverse );
 addMethodChaining( 'rand', rand );
+addMethodChaining( 'countTrailingZeros', countTrailingZeros );
+addMethodChaining( 'countLeadingZeros', countLeadingZeros );
+addMethodChaining( 'countOneBits', countOneBits );

From 9ef111a43016b4dec57f71914630acc2fb0065f9 Mon Sep 17 00:00:00 2001
From: Christian Helgeson <62450112+cmhhelgeson@users.noreply.github.com>
Date: Wed, 1 Oct 2025 15:33:14 -0700
Subject: [PATCH 02/10] sketch out solve

work on src

use setup approach and register functions that need to be created

bitcountNode

revert glslNodeBuilder changes

lint-fix

revert unintended changes to glslNodeBuilder and MathNode, change bitCount helper function names

remove extra _include

change mainLayout function to accurately reflect WGSL documentation (component wise bitCounts rather than accumulative bitCounts

fix nodeType

init branch

fix

prefix sum from other branch

eod

changes

validate reduction, fix display/debug issues

fix prefix sum errors

fix

fix spine scan and work on fixing downsweep

add comments

work

work

working prefix sum
---
 examples/jsm/gpgpu/BitonicSort.js       |   4 +-
 examples/jsm/gpgpu/PrefixSum.js         | 967 ++++++++++++++++++++++++
 examples/webgpu_compute_prefix_sum.html | 339 +++++++++
 src/Three.TSL.js                        |   1 +
 src/nodes/core/IndexNode.js             |   9 +
 src/nodes/core/NodeBuilder.js           |   7 +
 src/nodes/math/BitcountNode.js          |  46 ++
 src/nodes/math/MathNode.js              |  46 --
 8 files changed, 1371 insertions(+), 48 deletions(-)
 create mode 100644 examples/jsm/gpgpu/PrefixSum.js
 create mode 100644 examples/webgpu_compute_prefix_sum.html

diff --git a/examples/jsm/gpgpu/BitonicSort.js b/examples/jsm/gpgpu/BitonicSort.js
index 2a1175c198a23d..4059c23b02adca 100644
--- a/examples/jsm/gpgpu/BitonicSort.js
+++ b/examples/jsm/gpgpu/BitonicSort.js
@@ -79,9 +79,9 @@ export class BitonicSort {
 	/**
 	 * Constructs a new light probe helper.
 	 *
-	 * @param {Renderer} renderer - The current scene's renderer.
+	 * @param {Renderer} renderer - A renderer with the ability to execute compute operations.
 	 * @param {StorageBufferNode} dataBuffer - The data buffer to sort.
-	 * @param {Object} [options={}] - Options that modify the bitonic sort.
+	 * @param {Object} [options={}] - Options that modify the behavior of the bitonic sort.
 	 */
 	constructor( renderer, dataBuffer, options = {} ) {
 
diff --git a/examples/jsm/gpgpu/PrefixSum.js b/examples/jsm/gpgpu/PrefixSum.js
new file mode 100644
index 00000000000000..f88d56bed84636
--- /dev/null
+++ b/examples/jsm/gpgpu/PrefixSum.js
@@ -0,0 +1,967 @@
+import { Fn, If, instancedArray, invocationLocalIndex, countTrailingZeros, Loop, workgroupArray, subgroupSize, workgroupBarrier, workgroupId, uint, select, invocationSubgroupIndex, dot, uvec4, vec4, float, subgroupAdd, array, subgroupShuffle, subgroupInclusiveAdd, subgroupBroadcast, invocationSubgroupMetaIndex, arrayBuffer } from 'three/tsl';
+
+const divRoundUp = ( size, part_size ) => {
+
+	return Math.floor( ( size + part_size - 1 ) / part_size );
+
+};
+
+let id = 0;
+
+/**
+ * Storage buffers needed to execute a reduce-then-scan prefix sum`.
+ *
+ * @typedef {Object} PrefixSumStorageObjects
+ * @property {StorageBufferNode} reductionBuffer - Storage data buffer holding the reduction of each workgroup from the reduce step.
+ * @property {StorageBufferNode} dataBuffer - Storage data buffer holding the vectorized input data.
+ * @property {StorageBufferNode} unvectorizedDataBuffer - Storage data buffer holding the unvectorized input data.
+ * @property {StorageBufferNode} outputBuffer - Storage data buffer that returns the unvectorized output data of the prefix sum.
+ */
+
+/**
+ * Compute functions needed to execute a reduce-then-scan prefix sum`.
+ *
+ * @typedef {Object} PrefixSumComputeFunctions
+ * @property {ComputeNode} reduceFn - A compute shader that executes the reduce step of a reduce-then-scan prefix sum.
+ * @property {ComputeNode} spineScanFn - A compute shader that executes the spine scan step of a reduce-then-scan prefix sum.
+ * @property {ComputeNode} downsweepFn - A compute shader that executes the downsweep step of a reduce-then-scan prefix sum.
+ */
+
+/**
+ * Utility nodes used in multiple shaders across the reduce-then-scan prefix sum`.
+ *
+ * @typedef {Object} PrefixSumUtilityNodes
+ * @property {WorkgroupInfoNode} subgroupReductionArray - A workgroup memory buffer representing a workgroup scoped buffer that holds the result of a subgroup operation from each subgroup in a workgroup. Sized to account for minimumn WGSL subgroup size of 4.
+ * @property {Node<uint>} workgroupOffset - A node representing the vec4-alligned offset at which the workgroup with index 'workgroupId.x' will begin reading vec4 elements from the data buffer.
+ * @property {Node<uint>} subgroupOffset - A node representing the vec4-alligned offset from 'this.workgroupOffset' at which the subgroup with index 'subgroupMetaRank' will begin reading vec4 elements from a data buffer.
+ * @property {Node<uint>} unvectorizedSubgroupOffset - A node representing the uint-alligned offset from 'this.workgroupOffset' at which the subgroup with index 'subgroupMetaRank' will begin reading uint elements from a data buffer.
+ * @property {Node<uint>} subgroupSizeLog - A node that evaulates to n in 2^n = subgroupSize.
+ * @property {Node<uint>} spineSize - A node that calculates the number of partial reductions in a workgroup scan, or the number of subgroups in a workgroup on the current device.
+ * @property {Node<uint>} spineSizeLog - A node that evaluates to n in 2^n = spineSize.
+ */
+
+
+/**
+	* A class that represents a prefix sum running under the reduce/scan strategy.
+	* Currently limited to one-dimensional data buffers.
+	*
+	* @param {Renderer} renderer - A renderer with the ability to execute compute operations.
+	* @param {StorageBufferNode} dataBuffer - The data buffer to sum.
+	* @param {Object} [options={}] - Options that modify the reduce/scan prefix sum.
+	*/
+export class PrefixSum {
+
+	/**
+	 * Constructs a new light probe helper.
+	 *
+	 * @param {Renderer} renderer - A renderer with the ability to execute compute operations.
+	 * @param {number[]} inputArray - The data buffer to sum.
+	 * @param {'uint' | 'float'} inputArrayType - Type of input array
+	 * @param {Object} [options={}] - Options that modify the behavior of the prefix sum.
+	 */
+	constructor( renderer, inputArray, inputArrayType, options = {} ) {
+
+		/**
+		 * A reference to the renderer.
+		 *
+		 * @type {Renderer}
+		 */
+		this.renderer = renderer;
+
+		/**
+		 * @type {PrefixSumStorageObjects}
+		 */
+		this.storageBuffers = {};
+
+		/**
+		 * @type {PrefixSumComputeFunctions}
+		 */
+		this.computeFunctions = {};
+
+
+		/**
+		 * @type {PrefixSumUtilityNodes}
+		 */
+		this.utilityNodes = {};
+
+		this.type = inputArrayType;
+		this.vecType = inputArrayType === 'uint' ? 'uvec4' : 'vec4';
+
+		/**
+		 * The size of the data.
+		 *
+		 * @type {number}
+		 */
+		this.count = inputArray.length;
+
+		/**
+		 * The number of 4-dimensional vectors needed to fully represent the data in the data buffer.
+		 * Buffers where this.count % 4 !== 0 will need an additional vec4 to hold the data buffer's
+		 * remaining elements.
+		 *
+		 * @type {number}
+		 */
+		this.vecCount = divRoundUp( this.count, 4 );
+
+		while ( inputArray.length % 4 !== 0 ) {
+
+			inputArray.push( 0 );
+
+		}
+
+		/**
+		 * The number of 4-dimensional vectors that will be read from global storage in each invocation of the reduction/downsweep step.
+		 * Defaults to 4.
+		 *
+		 * @type {number}
+		*/
+		this.workPerInvocation = options.workPerInvocation ? options.workPerInvocation : 4;
+
+		/**
+		 * The number of unvectorized values to be read from the reduction buffer in each invocation of the spine/scan step.
+		 * Derived from workPerInvocation and thus defaults to 16.
+		 *
+		 * @type {number}
+		*/
+		this.unvectorizedWorkPerInvocation = this.workPerInvocation * 4;
+
+		/**
+		 * The workgroup size of the compute shaders executed during the prefix sum.
+		 * If no workgroupSize is defined, the workgroupSize defaults to the minimumn between the number of elements in the
+		 * data buffer and 64.
+		 *
+		 * @type {number}
+		*/
+		this.workgroupSize = options.workgroupSize ? options.workgroupSize : Math.min( this.vecCount, 64 );
+
+		/**
+		 * The maximum number of elements that will be read by an individual workgroup in the reduction step.
+		 * Calculated as the number of invocations in the workgroup by the work per invocation by VEC4_SIZE
+		 *
+		 * @type {number}
+		*/
+		this.partitionSize = this.workgroupSize * this.unvectorizedWorkPerInvocation;
+
+		/**
+		 * The number of workgroups needed to properly execute the reduction and downsweepsteps.
+		 * Calculated as the number of partitions within the count of elements.
+		 *
+		 * @type {number}
+		*/
+		this.numWorkgroups = divRoundUp( this.count, this.partitionSize );
+
+		/**
+		 * The number of invocations dispatched in each step of the prefix sum.
+		 *
+		 * @type {number}
+		*/
+		this.dispatchSize = this.numWorkgroups * this.workgroupSize;
+
+		this._createStorageBuffers( inputArray, inputArrayType, this.vecType, this.numWorkgroups );
+		this._createUtilityNodes();
+
+		/**
+		 * The step of the prefix sum to execute.
+		 *
+		 * @type {'Reduce' | 'Spine_Scan' | 'Downsweep'}
+		*/
+		this.currentStep = 'Reduce';
+
+
+		this.computeFunctions.reduceFn = this._getReduceFn();
+		this.computeFunctions.spineScanFn = this._getSpineScanFn();
+		this.computeFunctions.downsweepFn = this._getDownsweepFn();
+
+		id += 1;
+
+	}
+
+	_createStorageBuffers( inputArray ) {
+
+		this.arrayBuffer = this.type === 'uint' ? Uint32Array.from( inputArray ) : Float32Array.from( inputArray );
+
+		this.storageBuffers.unvectorizedDataBuffer = instancedArray( this.arrayBuffer, this.type ).setPBO( true ).setName( `Prefix_Sum_Input_Unvec_${id}` );
+		this.storageBuffers.dataBuffer = instancedArray( this.arrayBuffer, this.vecType ).setPBO( true ).setName( `Prefix_Sum_Input_Vec_${id}` );
+		this.storageBuffers.outputBuffer = instancedArray( this.arrayBuffer, this.vecType ).setName( `Prefix_Sum_Output_${id}` );
+		this.storageBuffers.reductionBuffer = instancedArray( this.numWorkgroups, this.type ).setPBO( true ).setName( `Prefix_Sum_Reduction_${id}` );
+
+	}
+
+	_createUtilityNodes() {
+
+		this.utilityNodes.subgroupReductionArray = workgroupArray( this.type, Math.ceil( this.workgroupSize / 4 ) );
+		this.utilityNodes.workgroupOffset = workgroupId.x.mul( uint( this.workgroupSize ).mul( this.workPerInvocation ) ).toVar( 'workgroupOffset' );
+		this.utilityNodes.subgroupOffset = invocationSubgroupMetaIndex.mul( subgroupSize ).mul( this.workPerInvocation ).toVar( 'subgroupOffset' );
+		this.utilityNodes.unvectorizedSubgroupOffset = invocationSubgroupMetaIndex.mul( subgroupSize ).mul( this.unvectorizedWorkPerInvocation ).toVar( 'unvectorizedSubgroupOffset' );
+		this.utilityNodes.subgroupSizeLog = countTrailingZeros( subgroupSize ).toVar( 'subgroupSizeLog' );
+		this.utilityNodes.spineSize = uint( this.workgroupSize ).shiftRight( this.utilityNodes.subgroupSizeLog ).toVar( 'spineSize' );
+		this.utilityNodes.spineSizeLog = countTrailingZeros( this.utilityNodes.spineSize ).toVar( 'spineSizeLog' );
+
+	}
+
+	_getSubgroupAlignedSize() {
+
+		const { spineSizeLog, subgroupSizeLog } = this.utilityNodes;
+
+		// Align size to powers of subgroupSize
+		const squaredSubgroupLog = ( spineSizeLog.add( subgroupSizeLog ).sub( 1 ) );
+		squaredSubgroupLog.divAssign( subgroupSizeLog );
+		squaredSubgroupLog.mulAssign( subgroupSizeLog );
+		const subgroupAlignedSize = ( uint( 1 ).shiftLeft( squaredSubgroupLog ) ).toVar( 'subgroupAlignedSize' );
+
+		return subgroupAlignedSize;
+
+	}
+
+
+	// NOTE: subgroupSizeLog needs to be defined in this._getSubgroupAlignedSize before this block can execute
+	_subgroupAlignedSizeBlock( subgroupAlignedSize, subgroupAllignedBlockCallback ) {
+
+		// In cases where the number of subgroups in a workgroup is greater than the subgroup size itself,
+		// we need to iterate over the array again to capture all the data in the workgroup array buffer
+		// In many cases this loop will only run once
+		Loop( { start: subgroupSize, end: subgroupAlignedSize, condition: '<=', name: 'j', type: 'uint', update: '<<= subgroupSizeLog' }, ( { j } ) => {
+
+			subgroupAllignedBlockCallback( j );
+
+		} );
+
+	}
+
+	_getSpineAlignedSize() {
+
+		const { numWorkgroups, partitionSize } = this;
+
+		const SPINE_PARTITION_SIZE = uint( partitionSize ).toVar( 'spinePartitionSize' );
+
+		const spineAlignedSize = ( SPINE_PARTITION_SIZE.add( numWorkgroups ).sub( 1 ) ).toVar( 'spineAlignedSize' );
+		spineAlignedSize.divAssign( SPINE_PARTITION_SIZE );
+		spineAlignedSize.mulAssign( SPINE_PARTITION_SIZE );
+
+		return spineAlignedSize;
+
+	}
+
+	_getSpineAlignedBlock( spineAlignedSize, spineAlignedBlockCallback ) {
+
+		// Allignment in cases where num elements is (SPINE_PARTITION_SIZE * SPINE_PARTITION_SIZE) + 1
+		Loop( { start: 0, end: spineAlignedSize, condition: '<', name: 'j', type: 'uint', update: '+= spinePartitionSize' }, ( { j } ) => {
+
+			spineAlignedBlockCallback( j );
+
+		} );
+
+	}
+
+	_workPerInvocationBlock( workgroupCallback, lastWorkgroupCallback ) {
+
+		const { numWorkgroups, workPerInvocation } = this;
+
+		// Each thread will accumulate values from across 'workPerInvocation' subgroups
+		If( workgroupId.x.lessThan( uint( numWorkgroups ).sub( 1 ) ), () => {
+
+			Loop( {
+				start: uint( 0 ),
+				end: workPerInvocation,
+				type: 'uint',
+				condition: '<',
+				name: 'currentSubgroupInBlock'
+			}, ( { currentSubgroupInBlock } ) => {
+
+				workgroupCallback( currentSubgroupInBlock );
+
+			} );
+
+		} );
+
+		// Ensure that the last workgroup does not access out of bounds indices
+		If( workgroupId.x.equal( uint( numWorkgroups ).sub( 1 ) ), () => {
+
+			Loop( {
+				start: uint( 0 ),
+				end: workPerInvocation,
+				type: 'uint',
+				condition: '<',
+				name: 'currentSubgroupInBlock'
+			}, ( { currentSubgroupInBlock } ) => {
+
+				lastWorkgroupCallback( currentSubgroupInBlock );
+
+			} );
+
+		} );
+
+	}
+
+	/**
+	 * Create the compute shader that performs the reduce operation.
+	 *
+	 * @private
+	 * @returns {ComputeNode} - A compute shader that executes a full local swap.
+	 */
+	_getReduceFn() {
+
+		const { reductionBuffer, dataBuffer } = this.storageBuffers;
+		const { vecCount } = this;
+		const { subgroupSizeLog, subgroupReductionArray, subgroupOffset, workgroupOffset, spineSize } = this.utilityNodes;
+
+		const fnDef = Fn( () => {
+
+			// Each subgroup block scans across 4 subgroups. So when we move into a new subgroup,
+			// align that subgroups' accesses to the next 4 subgroups
+			const threadSubgroupOffset = subgroupOffset.add( invocationSubgroupIndex ).toVar( 'threadSubgroupOffset' );
+
+			const startThreadBase = threadSubgroupOffset.add( workgroupOffset ).toVar( 'startThreadBase' );
+
+			const startThread = startThreadBase.toVar( 'startThread' );
+
+			let subgroupReduction;
+
+			if ( this.type === 'uint' ) {
+
+				subgroupReduction = uint( 0 );
+
+			} else {
+
+				subgroupReduction = float( 0 );
+
+			}
+
+			this._workPerInvocationBlock( () => {
+
+				// Get vectorized element from input array
+				const val = dataBuffer.element( startThread );
+
+
+				// Sum values within vec4 together by using result of dot product
+				if ( this.vecType === 'uvec4' ) {
+
+					subgroupReduction.addAssign( dot( uvec4( 1 ), val ) );
+
+				} else {
+
+					subgroupReduction.addAssign( dot( vec4( 1 ), val ) );
+
+				}
+
+				// Increment so thread will scan value in next subgroup
+				startThread.addAssign( subgroupSize );
+
+
+			}, () => {
+
+				let val;
+				if ( this.vecType === 'uvec4' ) {
+
+					// Ensure index is less than number of available vectors in inputBuffer
+					val = select( startThread.lessThan( uint( vecCount ) ), dataBuffer.element( startThread ), uvec4( 0 ) ).uniformFlow();
+
+					subgroupReduction.addAssign( dot( val, uvec4( 1 ) ) );
+
+				} else {
+
+					// Ensure index is less than number of available vectors in inputBuffer
+					val = select( startThread.lessThan( uint( vecCount ) ), dataBuffer.element( startThread ), vec4( 0 ) ).uniformFlow();
+
+					subgroupReduction.addAssign( dot( val, vec4( 1 ) ) );
+
+
+				}
+
+				startThread.addAssign( subgroupSize );
+
+			} );
+
+			subgroupReduction.assign( subgroupAdd( subgroupReduction ) );
+
+			// Assuming that each element in the input buffer is 1, we generally expect each invocation's subgroupReduction
+			// value to be ELEMENTS_PER_VEC4 * workPerInvocation * subgroupSize
+
+			// Delegate one thread per subgroup to assign each subgroup's reduction to the workgroup array
+			If( invocationSubgroupIndex.equal( uint( 0 ) ), () => {
+
+				subgroupReductionArray.element( invocationSubgroupMetaIndex ).assign( subgroupReduction );
+
+			} );
+
+			// Ensure that each workgroup has populated the perSubgroupReductionArray with data
+			// from each of it's subgroups
+			workgroupBarrier();
+
+			// WORKGROUP LEVEL REDUCE
+
+			const subgroupAlignedSize = this._getSubgroupAlignedSize();
+
+			// aligned size 2 * 4
+
+			const offset = uint( 0 );
+
+			// In cases where the number of subgroups in a workgroup is greater than the subgroup size itself,
+			// we need to iterate over the array again to capture all the data in the workgroup array buffer
+			// In many cases this loop will only run once
+			this._subgroupAlignedSizeBlock( subgroupAlignedSize, () => {
+
+				const subgroupIndex = ( ( invocationLocalIndex.add( 1 ) ).shiftLeft( offset ) ).sub( 1 );
+
+				const isValidSubgroupIndex = subgroupIndex.lessThan( spineSize ).toVar( 'isValidSubgroupIndex' );
+
+				// Reduce values within the local workgroup memory.
+				// Set toVar to ensure subgroupAdd executes before (not within) the if statement.
+				const t = subgroupAdd(
+					select(
+						isValidSubgroupIndex,
+						subgroupReductionArray.element( subgroupIndex ),
+						0
+					).uniformFlow()
+				).toVar( 't' );
+
+				// Can assign back to workgroupArray since all
+				// subgroup threads work in lockstop for subgroupAdd
+				If( isValidSubgroupIndex, () => {
+
+					subgroupReductionArray.element( subgroupIndex ).assign( t );
+
+				} );
+
+				// Ensure all threads have completed work
+
+				workgroupBarrier();
+
+				offset.addAssign( subgroupSizeLog );
+
+			} );
+
+			// Assign single thread from workgroup to assign workgroup reduction
+			If( invocationLocalIndex.equal( uint( 0 ) ), () => {
+
+				const reducedWorkgroupSum = subgroupReductionArray.element( uint( spineSize ).sub( 1 ) );
+
+				// TODO: Comment out in prod
+				// dataBuffer.element( workgroupId.x ).assign( reducedWorkgroupSum );
+
+				reductionBuffer.element( workgroupId.x ).assign( reducedWorkgroupSum );
+
+			} );
+
+		} )().compute( this.dispatchSize, [ this.workgroupSize ] );
+
+		return fnDef;
+
+	}
+
+	/**
+	 * Executes a downsweep operation on the data buffer.
+	 *
+	 * @param {Node<number>} inputNode - The input node.
+	 * @param {Node<number> | number} maskNode - The number of bits to mask.
+	 * @return {Node<uint>}
+	 */
+	_maskLowerBits( inputNode, maskNode ) {
+
+		return ( inputNode.shiftRight( maskNode ) ).shiftLeft( maskNode );
+
+	}
+
+
+	/**
+	 * Create the compute shader that performs the spine scan operation.
+	 *
+	 * @private
+	 * @returns {ComputeNode} - A compute shader that executes a full local swap.
+	 */
+	_getSpineScanFn() {
+
+		const { reductionBuffer } = this.storageBuffers;
+		const { subgroupReductionArray, unvectorizedSubgroupOffset, spineSize, subgroupSizeLog } = this.utilityNodes;
+		const { unvectorizedWorkPerInvocation } = this;
+
+		const fnDef = Fn( () => {
+
+			const subgroupAlignedSize = this._getSubgroupAlignedSize();
+			const spineAlignedSize = this._getSpineAlignedSize();
+
+			const t_scan = array( 'uint', 16 ).toVar();
+			const previousReduction = uint( 0 ).toVar( 'previousReduction' );
+
+			const s_offset = unvectorizedSubgroupOffset.add( invocationSubgroupIndex ).toVar( 's_offset' );
+
+			this._getSpineAlignedBlock( spineAlignedSize, ( devOffset ) => {
+
+				const reducedWorkgroupIndex = s_offset.add( devOffset );
+
+				Loop( {
+					start: uint( 0 ),
+					end: uint( unvectorizedWorkPerInvocation ),
+					type: 'uint',
+					condition: '<',
+					name: 'k'
+				}, ( { k } ) => {
+
+					// The reduction buffer holds a collection of reductions from within
+					// each indice's respective workgroup, so ensure that we only access
+					// valid workgroup indices
+
+					If( reducedWorkgroupIndex.lessThan( this.numWorkgroups ), () => {
+
+						t_scan.element( k ).assign( reductionBuffer.element( reducedWorkgroupIndex ) );
+
+					} );
+
+					reducedWorkgroupIndex.addAssign( subgroupSize );
+
+				} );
+
+				const prev = uint( 0 ).toVar( 'prev' );
+				Loop( {
+					start: uint( 0 ),
+					end: uint( unvectorizedWorkPerInvocation ),
+					type: 'uint',
+					condition: '<',
+					update: '+= 1u',
+					name: 'k'
+				}, ( { k } ) => {
+
+					const tScanElement = t_scan.element( k );
+
+					tScanElement.assign( subgroupInclusiveAdd( tScanElement ).add( prev ) );
+					prev.assign( subgroupShuffle( tScanElement, subgroupSize.sub( 1 ) ) );
+
+				} );
+
+				if ( invocationSubgroupIndex.equal( subgroupSize.sub( 1 ) ) ) {
+
+					subgroupReductionArray.element( invocationSubgroupMetaIndex ).assign( prev );
+
+				}
+
+				workgroupBarrier();
+
+				const offset0 = uint( 0 ).toVar();
+				const offset1 = uint( 0 ).toVar();
+
+				this._subgroupAlignedSizeBlock( subgroupAlignedSize, ( j ) => {
+
+					const isValidSubgroupIndex = j.notEqual( subgroupSize );
+					const isValidSubgroupInt = select( isValidSubgroupIndex, uint( 1 ), uint( 0 ) ).uniformFlow();
+
+					const i0 = ( invocationLocalIndex.add( offset0 ) ).shiftLeft( offset1 ).sub( isValidSubgroupInt );
+					const pred0 = i0.lessThan( spineSize );
+
+					// Need to cast toVar() here otherwise subgroupInclusiveAdd gets inlined within a non-uniform block
+					const t0 = subgroupInclusiveAdd( select( pred0, subgroupReductionArray.element( i0 ), uint( 0 ) ).uniformFlow() ).toVar();
+
+					If( pred0, () => {
+
+						subgroupReductionArray.element( i0 ).assign( t0 );
+
+					} );
+
+					If( isValidSubgroupIndex, () => {
+
+						const rShift = j.shiftRight( subgroupSizeLog );
+						const i1 = invocationLocalIndex.add( rShift );
+
+						const weirdValue = i1.bitAnd( j.sub( 1 ) );
+
+						If( weirdValue.greaterThanEqual( rShift ), () => {
+
+							const pred1 = i1.lessThan( spineSize );
+
+							const t1 = select( pred1, subgroupReductionArray.element( this._maskLowerBits( i1, offset1 ).sub( 1 ) ), 0 ).uniformFlow();
+
+							If(
+								pred1.and(
+									( i1.add( 1 ).bitAnd( rShift.sub( 1 ) ) ).notEqual( 0 )
+								), () => {
+
+									subgroupReductionArray.element( i1 ).addAssign( t1 );
+
+								} );
+
+
+						} );
+
+
+					} ).Else( () => {
+
+						offset0.addAssign( 1 );
+
+					} );
+
+					offset1.addAssign( subgroupSizeLog );
+
+				} );
+
+				workgroupBarrier();
+
+				const lastSubgroupReduction = select(
+					invocationSubgroupMetaIndex.notEqual( 0 ),
+					subgroupReductionArray.element( invocationSubgroupMetaIndex.sub( 1 ) ),
+					uint( 0 )
+				).uniformFlow();
+
+				const newPrev = lastSubgroupReduction.add( previousReduction );
+
+				const i = s_offset.add( devOffset );
+
+				Loop( {
+					start: uint( 0 ),
+					end: uint( unvectorizedWorkPerInvocation ),
+					type: 'uint',
+					condition: '<',
+					name: 'k'
+				}, ( { k } ) => {
+
+					If( i.lessThan( this.numWorkgroups ), () => {
+
+						reductionBuffer.element( i ).assign( t_scan.element( k ).add( newPrev ) );
+
+					} );
+
+					i.addAssign( subgroupSize );
+
+
+				} );
+
+				previousReduction.addAssign( subgroupBroadcast( subgroupReductionArray.element( subgroupAlignedSize.sub( 1 ) ), 0 ) );
+				workgroupBarrier();
+
+			} );
+
+		} )().compute( this.numWorkgroups, [ this.workgroupSize ] );
+
+		console.log( fnDef );
+
+		return fnDef;
+
+	}
+
+	_getDownsweepFn() {
+
+		const { dataBuffer, reductionBuffer, outputBuffer } = this.storageBuffers;
+		const { vecType } = this;
+		const { subgroupOffset, workgroupOffset, subgroupReductionArray, subgroupSizeLog, spineSize } = this.utilityNodes;
+
+		const { workPerInvocation, vecCount } = this;
+
+		const fnDef = Fn( () => {
+
+			const threadSubgroupOffset = subgroupOffset.add( invocationSubgroupIndex );
+
+			const startThreadBase = threadSubgroupOffset.add( workgroupOffset );
+
+			const startThread = startThreadBase.toVar();
+
+			const vec4FilledWithZeroArray = [];
+
+			for ( let i = 0; i < workPerInvocation; i ++ ) {
+
+				vec4FilledWithZeroArray.push( uvec4( 0 ) );
+
+			}
+
+			const tScan = array( vec4FilledWithZeroArray ).toVar();
+
+			// Prefix Sum elements within individual vec4 elements
+
+			this._workPerInvocationBlock( ( currentSubgroupInBlock ) => {
+
+				const scanIn = dataBuffer.element( startThread );
+				const currentTScanElement = tScan.element( currentSubgroupInBlock );
+
+				console.log( currentTScanElement );
+
+				currentTScanElement.assign( scanIn );
+
+				currentTScanElement.y.addAssign( currentTScanElement.x );
+				currentTScanElement.z.addAssign( currentTScanElement.y );
+				currentTScanElement.w.addAssign( currentTScanElement.z );
+
+				startThread.addAssign( subgroupSize );
+
+			}, ( currentSubgroupInBlock ) => {
+
+				If( startThread.lessThan( uint( vecCount ) ), () => {
+
+					const scanIn = dataBuffer.element( startThread );
+					const currentTScanElement = tScan.element( currentSubgroupInBlock );
+
+					currentTScanElement.assign( scanIn );
+
+					currentTScanElement.y.addAssign( currentTScanElement.x );
+					currentTScanElement.z.addAssign( currentTScanElement.y );
+					currentTScanElement.w.addAssign( currentTScanElement.z );
+
+					startThread.addAssign( subgroupSize );
+
+				} );
+
+			} );
+
+			// Each thread now has prefix sums of the elements in 'workPerInvocation' vec4s
+
+			const prev = uint( 0 ).toVar();
+
+			const laneMask = subgroupSize.sub( 1 ).toVar( 'laneMask' );
+			const clockwiseShift = ( invocationSubgroupIndex.add( laneMask ) ).bitAnd( laneMask ).toVar( 'clockwiseShift' );
+
+			Loop( {
+				start: uint( 0 ),
+				end: uint( workPerInvocation ),
+				type: 'uint',
+				condition: '<',
+				name: 'currentSubgroupInBlock'
+			}, ( { currentSubgroupInBlock } ) => {
+
+
+				// previous greatest accumulated value
+				const prevAccGreatestValue = subgroupShuffle(
+
+					// Get the largest element within each vector (always w since prefix sum)
+					// Then add together with the same element in each lane of the subgroup.
+					// Assume all values in data buffer are 1 and subgroupSize is 4
+					// Subgroup 0, 1, 2, 3 values -> 4
+					// Invocation 0 value after inclusiveAdd 4
+					// Invocation 1 value after inclusiveAdd 8
+					// Invocation 2 value after inclusiveAdd 12
+					// Invocation 3 value after inclusiveAdd 16
+
+					subgroupInclusiveAdd( tScan.element( currentSubgroupInBlock ).w ),
+
+					// Shuffle each value between lanes in the subgroup counterClockWise
+					// Effectively a looping subgroupShuffleDown
+					// Inv 0 gets inv 3 value 16
+					// Invocation 1 gets inv 0 value 4
+					// Invocation 2 gets inv 1 value 8
+					// Invocation 3 gets inv 2 value 12
+
+					clockwiseShift
+				).toVar( 'prevAccGreatestValue' );
+
+				const isNotInvocationSubgroupIndex0 = invocationSubgroupIndex.notEqual( uint( 0 ) );
+
+				let addEle;
+
+				// Vector read by lane 0 does not get changed by since it is already prefix summed
+				// within context of its subgroup, so we don't want to add greatest value for it.
+				// The purpose of shuffling to all lanes of the subgroup including lane 0 is simply
+				// to have the greatest value accessible for the broadcast from lane 0.
+
+				if ( this.vecType === 'uvec4' ) {
+
+					addEle = prev.add( select( isNotInvocationSubgroupIndex0, prevAccGreatestValue, uvec4( 0 ) ).uniformFlow() );
+
+				} else {
+
+					addEle = prev.add( select( isNotInvocationSubgroupIndex0, prevAccGreatestValue, vec4( 0 ) ).uniformFlow() );
+
+				}
+
+				tScan.element( currentSubgroupInBlock ).addAssign( addEle );
+
+				// Broadcast value of invocationSubgroupIndex 0 (which is usually largest value ) to prev
+				prev.addAssign( subgroupBroadcast( prevAccGreatestValue, uint( 0 ) ) );
+
+			} );
+
+			If( invocationSubgroupIndex.equal( uint( 0 ) ), () => {
+
+				subgroupReductionArray.element( invocationSubgroupMetaIndex ).assign( prev );
+
+			} );
+
+			workgroupBarrier();
+
+
+			const offset0 = uint( 0 ).toVar();
+			const offset1 = uint( 0 ).toVar();
+
+
+			const subgroupAlignedSize = this._getSubgroupAlignedSize();
+
+			// In cases where the number of subgroups in a workgroup is greater than the subgroup size itself,
+			// we need to iterate over the array again to capture all the data in the workgroup array buffer
+			this._subgroupAlignedSizeBlock( subgroupAlignedSize, ( j ) => {
+
+				const i0 = (
+					( invocationLocalIndex.add( offset0 ) ).shiftLeft( offset1 )
+				).sub( offset0 );
+
+				const pred0 = i0.lessThan( spineSize );
+
+				const t0 = subgroupInclusiveAdd(
+					select( pred0, subgroupReductionArray.element( i0 ), uint( 0 ) ).uniformFlow()
+				).toVar();
+
+				If( pred0, () => {
+
+					subgroupReductionArray.element( i0 ).assign( t0 );
+
+				} );
+
+				workgroupBarrier();
+
+				If( j.notEqual( subgroupSize ), () => {
+
+					const rShift = j.shiftRight( subgroupSizeLog );
+					const i1 = invocationLocalIndex.add( rShift );
+					If( ( i1.bitAnd( j.sub( 1 ) ) ).greaterThanEqual( rShift ), () => {
+
+						const pred1 = i1.lessThan( spineSize );
+						const t1 = select(
+							pred1,
+							subgroupReductionArray.element( this._maskLowerBits( i1, offset1 ).sub( 1 ) ),
+							uint( 0 )
+						).uniformFlow();
+
+						If(
+							pred1.and(
+								( i1.add( 1 ) ).bitAnd( rShift.sub( 1 ) ).notEqual( uint( 0 ) ) )
+							, () => {
+
+								subgroupReductionArray.element( i1 ).addAssign( t1 );
+
+							}
+						);
+
+					} );
+
+				} ).Else( () => {
+
+					offset0.addAssign( 1 );
+
+				} );
+
+				offset1.addAssign( subgroupSize );
+
+			} );
+
+			workgroupBarrier();
+
+			const spineScanWorkgroupReduction = select(
+				workgroupId.x.notEqual( uint( 0 ) ),
+				reductionBuffer.element( workgroupId.x.sub( 1 ) ),
+				uint( 0 )
+			).uniformFlow();
+
+			const downsweepSubgroupReduction = select(
+				invocationSubgroupMetaIndex.notEqual( 0 ),
+				subgroupReductionArray.element( invocationSubgroupMetaIndex.sub( 1 ) ),
+				uint( 0 )
+			).uniformFlow();
+
+			prev.assign( spineScanWorkgroupReduction.add( downsweepSubgroupReduction ) );
+
+			// LAST BLOCK
+
+			startThread.assign( startThreadBase );
+
+			this._workPerInvocationBlock( ( currentSubgroupInBlock ) => {
+
+				const sweepValue = tScan.element( currentSubgroupInBlock ).add( prev );
+				outputBuffer.element( startThread ).assign( sweepValue );
+				startThread.addAssign( subgroupSize );
+
+			}, ( currentSubgroupInBlock ) => {
+
+				If( startThread.lessThan( uint( vecCount ) ), () => {
+
+					const sweepValue = tScan.element( currentSubgroupInBlock ).add( prev );
+					outputBuffer.element( startThread ).assign( sweepValue );
+					startThread.addAssign( subgroupSize );
+
+				} );
+
+			} );
+
+		} )().compute( this.dispatchSize, [ this.workgroupSize ] );
+
+		return fnDef;
+
+	}
+
+
+	/**
+	 * Executes an intermediate reduction operation on the data buffer.
+	 *
+	 * @param {Renderer} renderer - The current scene's renderer.
+	 */
+	async computeReduce() {
+
+		this.renderer.compute( this.computeFunctions.reduceFn );
+
+	}
+
+	/**
+	 * Executes a spine scan operation on the data buffer.
+	 *
+	 * @param {Renderer} renderer - The current scene's renderer.
+	 */
+	async computeSpineScan() {
+
+		this.renderer.compute( this.computeFunctions.spineScanFn );
+
+	}
+
+	/**
+	 * Executes a downsweep operation on the data buffer.
+	 *
+	 * @param {Renderer} renderer - The current scene's renderer.
+	 */
+	async computeDownsweep() {
+
+		this.renderer.compute( this.computeFunctions.downsweepFn );
+
+	}
+
+	/**
+	 * Executes the next subsequent compute step of a prefix sum.
+	 *
+	 * @param {Renderer} renderer - A renderer with the ability to execute compute operations.
+	 */
+	async computeStep() {
+
+		switch ( this.currentStep ) {
+
+			case 'Reduce': {
+
+				await this.computeReduce();
+				this.currentStep = 'Spine_Scan';
+				break;
+
+			}
+
+			case 'Spine_Scan': {
+
+				await this.computeSpineScan();
+				this.currenTstep = 'Downsweep';
+				break;
+
+			}
+
+			case 'Downsweep': {
+
+				await this.computeDownsweep();
+				this.currentStep = 'Reduce';
+				break;
+
+			}
+
+		}
+
+	}
+
+	/**
+	 * Executes a complete prefix sum on the data buffer.
+	 *
+	 * @param {Renderer} renderer - The current scene's renderer.
+	 */
+	async compute() {
+
+		await this.computeStep( this.currentStep );
+		await this.computeStep( this.currentStep );
+		await this.computeStep( this.currentStep );
+
+	}
+
+}
diff --git a/examples/webgpu_compute_prefix_sum.html b/examples/webgpu_compute_prefix_sum.html
new file mode 100644
index 00000000000000..5428e09fb32b46
--- /dev/null
+++ b/examples/webgpu_compute_prefix_sum.html
@@ -0,0 +1,339 @@
+<html lang="en">
+	<head>
+		<title>three.js webgpu - compute reduction</title>
+		<meta charset="utf-8">
+		<meta name="viewport" content="width=device-width, user-scalable=no, minimum-scale=1.0, maximum-scale=1.0">
+		<link type="text/css" rel="stylesheet" href="main.css">
+	</head>
+	<body> 
+
+		<div id="info">
+			<a href="https://threejs.org" target="_blank" rel="noopener">three.js</a>
+			<br /> This example demonstrates a prefix sum operation on a buffer of data.
+			<br /> Reference implementations are translated from the WGSL code present in <a href="https://github.com/b0nes164/GPUPrefixSums/blob/main/GPUPrefixSumsWebGPUapis/SharedShaders/rts.wgsl"><i>GPUPrefixSums</i></a> by <a href="https://github.com/b0nes164">b0nes164</a>
+		</div>
+
+		<script type="importmap">
+			{
+				"imports": {
+					"three": "../build/three.webgpu.js",
+					"three/webgpu": "../build/three.webgpu.js",
+					"three/tsl": "../build/three.tsl.js",
+					"three/addons/": "./jsm/"
+				}
+			}
+		</script>
+
+		<script type="module">
+
+			import * as THREE from 'three/webgpu';
+			import { instancedArray, If, vec3, clamp, uv, uint, float, Fn, vec2, uvec2, floor, instanceIndex } from 'three/tsl';
+
+			import { PrefixSum } from 'three/addons/gpgpu/PrefixSum.js';
+
+			import WebGPU from 'three/addons/capabilities/WebGPU.js';
+
+			import { GUI } from 'three/addons/libs/lil-gui.module.min.js';
+
+			if ( WebGPU.isAvailable() === false ) {
+
+				document.body.appendChild( WebGPU.getErrorMessage() );
+
+				throw new Error( 'No WebGPU support' );
+
+			}
+
+			const effectController = {
+				currentStep: 'Reduce',
+				loggedBuffer: 'Reduction',
+				displayedBuffer: 'Reduction',
+			};
+
+			const PREFIX_SUM_BUFFER_TYPES = {
+				INPUT_UNVECTORIZED: 'Input Unvectorized',
+				INPUT_VECTORIZED: 'Input Vectorized',
+				REDUCTION: 'Reduction',
+				OUTPUT: 'Output'
+			};
+
+			const gui = new GUI();
+
+			const size = 32768;
+
+			// Set up canvas
+			const canvas = document.createElement( 'canvas' );
+			const canvasDim = 1000;
+			canvas.width = canvasDim;
+			canvas.height = canvasDim;
+
+			const canvasContext = canvas.getContext( '2d' );
+			canvasContext.font = '100px serif';
+			canvasContext.textBaseline = 'alphabetic';
+			canvasContext.textAlign = 'center';
+
+
+			const canvasTemplateImage = new Image();
+
+			const getGridDimensions = ( reductionArray ) => {
+
+				const gridWidth = Math.sqrt( reductionArray.length ) % 2 === 0
+      	? Math.floor( Math.sqrt( reductionArray.length ) )
+      	: Math.floor( Math.sqrt( reductionArray.length / 2 ) );
+
+				const gridHeight = reductionArray.length / gridWidth;
+
+				return { gridWidth, gridHeight };
+
+			};
+
+			const createCanvasTemplate = ( reductionArray ) => {
+
+				canvasContext.fillStyle = '#FFFFFF';
+				canvasContext.fillRect( 0, 0, canvasDim, canvasDim );
+
+				canvasContext.strokeStyle = 'black';
+				canvasContext.lineWidth = 2;
+
+				const { gridWidth, gridHeight } = getGridDimensions( reductionArray );
+
+				const blockHeight = canvasDim / gridHeight;
+				const blockWidth = canvasDim / gridWidth;
+
+				canvasContext.beginPath();
+
+				for ( let i = 0; i < gridHeight - 1; i ++ ) {
+
+					const horizontalLineOffsetY = blockHeight * ( i + 1 );
+
+					canvasContext.moveTo( 0, horizontalLineOffsetY );
+					canvasContext.lineTo( canvasDim, horizontalLineOffsetY );
+
+				}
+
+				for ( let i = 0; i < gridWidth - 1; i ++ ) {
+
+					const verticalLineOffsetX = blockWidth * ( i + 1 );
+
+					canvasContext.moveTo( verticalLineOffsetX, 0 );
+					canvasContext.lineTo( verticalLineOffsetX, canvasDim );
+
+				}
+
+				canvasContext.stroke();
+
+				canvasTemplateImage.src = canvas.toDataURL( 'image/png' );
+
+			};
+
+			const updateCanvas = ( reductionArray = [ 1, 1, 1, 1, 1, 1, 1, 1 ] ) => {
+
+				canvasContext.clearRect( 0, 0, canvasDim, canvasDim );
+				canvasContext.drawImage( canvasTemplateImage, 0, 0 );
+
+				const { gridWidth, gridHeight } = getGridDimensions( reductionArray );
+
+				const blockHeight = canvasDim / gridHeight;
+				const blockWidth = canvasDim / gridWidth;
+				const halfBlockWidth = blockWidth / 2;
+				const halfBlockHeight = blockHeight / 2;
+
+				for ( let i = 0; i < reductionArray.length; i ++ ) {
+
+					const metrics = canvasContext.measureText( `${reductionArray[ i ]}` );
+
+					const textHeight = metrics.actualBoundingBoxAscent + metrics.actualBoundingBoxDescent;
+
+					const verticalOffset = textHeight / 2 - metrics.actualBoundingBoxDescent;
+
+					console.log( verticalOffset );
+
+					const xGridCoord = i % gridWidth;
+					const yGridCoord = Math.floor( i / gridWidth );
+
+					canvasContext.fillText(
+						`${reductionArray[ i ]}`,
+						( blockWidth * xGridCoord ) + ( halfBlockWidth + metrics.width / 2 ),
+						( blockHeight * yGridCoord ) + ( halfBlockHeight + verticalOffset )
+					);
+
+				}
+
+				canvasContext.fillStyle = 'blue';
+				//canvasContext.fillRect( 0, 0, 20, 20 );
+
+			};
+
+			createCanvasTemplate( [ 1, 1, 1, 1, 1, 1, 1, 1 ] );
+
+			let bufferOutput;
+			
+			async function init() {
+
+				const aspect = window.innerWidth / window.innerHeight;
+				const camera = new THREE.OrthographicCamera( - aspect, aspect, 1, - 1, 0, 2 );
+				camera.position.z = 1;
+
+				const scene = new THREE.Scene();
+				scene.background = new THREE.Color( 0x212121 );
+
+				const dynamicArray = [];
+
+				for ( let i = 0; i < size; i ++ ) {
+
+					dynamicArray.push( 1 );
+
+				}
+
+				const renderer = new THREE.WebGPURenderer( { antialias: false } );
+				renderer.setPixelRatio( window.devicePixelRatio );
+				renderer.setSize( window.innerWidth, window.innerHeight );
+
+				await renderer.init();
+
+				const prefixSumModule = new PrefixSum( renderer, dynamicArray, 'uint', {
+					workgroupSize: 256
+				} );
+
+				const canvasMap = new THREE.CanvasTexture( canvas );
+
+				const plane = new THREE.Mesh( new THREE.PlaneGeometry( 1, 1 ), new THREE.MeshBasicNodeMaterial( { map: canvasMap } ) );
+				scene.add( plane );
+
+				const animate = () => {
+
+					renderer.render( scene, camera );
+
+				};
+
+				const debugFolder = gui.addFolder( 'Debug' );
+				debugFolder.add( effectController, 'displayedBuffer', Object.values( PREFIX_SUM_BUFFER_TYPES ) ).name( 'Buffer to Display' ).onChange( async () => {
+
+					updateCanvas();
+
+					plane.material.map.needsUpdate = true;
+
+					if ( effectController.displayedBuffer === 'Reduction' ) {
+
+						bufferOutput = new Uint32Array( await renderer.getArrayBufferAsync( prefixSumModule.storageBuffers.reductionBuffer.value ) );
+
+					} else {
+
+						bufferOutput = [ 1, 1, 1, 1, 1, 1, 1, 1, 1 ];
+			
+					}
+
+				} );
+				debugFolder.add( effectController, 'loggedBuffer', Object.values( PREFIX_SUM_BUFFER_TYPES ) ).name( 'Buffer to Log' );
+				const logFunctionName = 'Log Selected Buffer';
+				const functionObj = {};
+				functionObj[ logFunctionName ] = async() => {
+
+					let selectedBuffer;
+
+					if ( effectController.loggedBuffer === PREFIX_SUM_BUFFER_TYPES.REDUCTION ) {
+
+						selectedBuffer = prefixSumModule.storageBuffers.reductionBuffer;
+
+					} else if ( effectController.loggedBuffer === PREFIX_SUM_BUFFER_TYPES.INPUT_UNVECTORIZED ) {
+
+						selectedBuffer = prefixSumModule.storageBuffers.unvectorizedDataBuffer;
+
+					} else if ( effectController.loggedBuffer === PREFIX_SUM_BUFFER_TYPES.INPUT_VECTORIZED ) {
+
+						selectedBuffer = prefixSumModule.storageBuffers.dataBuffer;
+
+					} else {
+
+						selectedBuffer = prefixSumModule.storageBuffers.outputBuffer;
+
+					}
+
+					console.log( new Uint32Array( await renderer.getArrayBufferAsync( selectedBuffer.value ) ) );
+
+				};
+
+				functionObj[ 'Log Prefix Sum Class' ] = () => {
+
+					console.log( prefixSumModule );
+
+				};
+
+				debugFolder.add( functionObj, 'Log Selected Buffer' );
+				debugFolder.add( functionObj, 'Log Prefix Sum Class' );
+
+				prefixSumModule.computeReduce();
+				prefixSumModule.computeSpineScan();
+				prefixSumModule.computeDownsweep();
+
+				renderer.setAnimationLoop( animate );
+				document.body.appendChild( renderer.domElement );
+
+				/*const stepAnimation = async function () {
+
+					switch ( effectController.currentStep ) {
+
+						case 'Reduce': {
+
+							prefixSumModule.computeReduce();
+
+						}
+
+						case 'Spine_Scan': {
+
+							prefixSumModule.computeSpineScan();
+
+						}
+
+						case 'Downsweep': {
+
+							prefixSumModule.computeDownsweep();
+
+						}
+
+						case 'Step_Full': {
+
+							prefixSumModule.computeStep();
+
+						}
+
+					}
+
+					prefixSumModule.computeStep();
+
+
+					renderer.render( scene, camera );
+					renderer.resolveTimestampsAsync( THREE.TimestampQuery.RENDER );
+
+					setTimeout( stepAnimation, 1000 );
+
+				}; */
+
+
+				window.addEventListener( 'resize', onWindowResize );
+
+				function onWindowResize() {
+
+					renderer.setSize( window.innerWidth, window.innerHeight );
+
+					const aspect = window.innerWidth / window.innerHeight;
+
+					const frustumHeight = camera.top - camera.bottom;
+
+					camera.left = - frustumHeight * aspect / 2;
+					camera.right = frustumHeight * aspect / 2;
+
+					camera.updateProjectionMatrix();
+
+					renderer.render( scene, camera );
+
+				}
+
+				//setTimeout( stepAnimation, 1000 );
+
+			}
+
+			init();
+
+		</script>
+	</body>
+</html>
\ No newline at end of file
diff --git a/src/Three.TSL.js b/src/Three.TSL.js
index 529be9a5798632..9ca6dd167c08fb 100644
--- a/src/Three.TSL.js
+++ b/src/Three.TSL.js
@@ -238,6 +238,7 @@ export const inverseSqrt = TSL.inverseSqrt;
 export const inversesqrt = TSL.inversesqrt;
 export const invocationLocalIndex = TSL.invocationLocalIndex;
 export const invocationSubgroupIndex = TSL.invocationSubgroupIndex;
+export const invocationSubgroupMetaIndex = TSL.invocationSubgroupMetaIndex;
 export const ior = TSL.ior;
 export const iridescence = TSL.iridescence;
 export const iridescenceIOR = TSL.iridescenceIOR;
diff --git a/src/nodes/core/IndexNode.js b/src/nodes/core/IndexNode.js
index 5908cc694d75ea..55298557d08a99 100644
--- a/src/nodes/core/IndexNode.js
+++ b/src/nodes/core/IndexNode.js
@@ -1,5 +1,6 @@
 import Node from './Node.js';
 import { nodeImmutable, varying } from '../tsl/TSLBase.js';
+import { subgroupSize } from '../gpgpu/ComputeBuiltinNode.js';
 
 /**
  * This class represents shader indices of different types. The following predefined node
@@ -155,6 +156,14 @@ export const invocationSubgroupIndex = /*@__PURE__*/ nodeImmutable( IndexNode, I
  */
 export const invocationLocalIndex = /*@__PURE__*/ nodeImmutable( IndexNode, IndexNode.INVOCATION_LOCAL );
 
+/**
+ * TSL object that represents the index of a compute invocation within the scope of a subgroup.
+ *
+ * @tsl
+ * @type {IndexNode}
+ */
+export const invocationSubgroupMetaIndex = /*@__PURE__*/ invocationLocalIndex.div( subgroupSize ).toVar( 'invocationSubgroupMetaIndex' );
+
 /**
  * TSL object that represents the index of a draw call.
  *
diff --git a/src/nodes/core/NodeBuilder.js b/src/nodes/core/NodeBuilder.js
index 822e74871ee1e4..ef237e1798ed8f 100644
--- a/src/nodes/core/NodeBuilder.js
+++ b/src/nodes/core/NodeBuilder.js
@@ -1226,9 +1226,16 @@ class NodeBuilder {
 			if ( type === 'float' || type === 'int' || type === 'uint' ) value = 0;
 			else if ( type === 'bool' ) value = false;
 			else if ( type === 'color' ) value = new Color();
+<<<<<<< HEAD
 			else if ( type === 'vec2' || type === 'uvec2' || type === 'ivec2' ) value = new Vector2();
 			else if ( type === 'vec3' || type === 'uvec3' || type === 'ivec3' ) value = new Vector3();
 			else if ( type === 'vec4' || type === 'uvec4' || type === 'ivec4' ) value = new Vector4();
+=======
+			else if ( type === 'vec2' || type === 'uvec2' ) value = new Vector2();
+			else if ( type === 'vec3' || type === 'uvec3' ) value = new Vector3();
+			// vec4 defaults to (0, 0, 0, 1)
+			else if ( type === 'vec4' || type === 'uvec4' ) value = new Vector4();
+>>>>>>> d83ef1ebb4 (sketch out solve)
 
 		}
 
diff --git a/src/nodes/math/BitcountNode.js b/src/nodes/math/BitcountNode.js
index e11f63b2810cf0..55572dd76a7a31 100644
--- a/src/nodes/math/BitcountNode.js
+++ b/src/nodes/math/BitcountNode.js
@@ -1,4 +1,8 @@
+<<<<<<< HEAD
 import { float, Fn, If, nodeProxyIntent, uint, int, uvec2, uvec3, uvec4, ivec2, ivec3, ivec4 } from '../tsl/TSLCore.js';
+=======
+import { addMethodChaining, float, Fn, If, nodeProxyIntent, uint, int, uvec2, uvec3, uvec4, ivec2, ivec3, ivec4 } from '../tsl/TSLCore.js';
+>>>>>>> 277001d084 (sketch out solve)
 import { bitcast, floatBitsToUint } from './BitcastNode.js';
 import MathNode, { negate } from './MathNode.js';
 
@@ -41,7 +45,10 @@ class BitcountNode extends MathNode {
 	/**
 	 * Casts the input value of the function to an integer if necessary.
 	 *
+<<<<<<< HEAD
 	 * @private
+=======
+>>>>>>> 277001d084 (sketch out solve)
 	 * @param {Node<uint>|Node<int>} inputNode - The input value.
 	 * @param {Node<uint>} outputNode - The output value.
 	 * @param {string} elementType - The type of the input value.
@@ -60,6 +67,25 @@ class BitcountNode extends MathNode {
 
 	}
 
+<<<<<<< HEAD
+=======
+	_returnBaseDataNode( elementType ) {
+
+		if ( elementType === 'uint' ) {
+
+			return uint;
+
+		}
+
+		if ( elementType === 'int' ) {
+
+			return int;
+
+		}
+
+	}
+
+>>>>>>> 277001d084 (sketch out solve)
 	_returnDataNode( inputType ) {
 
 		switch ( inputType ) {
@@ -112,6 +138,7 @@ class BitcountNode extends MathNode {
 
 			}
 
+<<<<<<< HEAD
 		}
 
 	}
@@ -124,6 +151,15 @@ class BitcountNode extends MathNode {
 	 * @param {string} elementType - The type of the input value.
 	 * @returns {Function} - The generated function
 	 */
+=======
+
+		}
+
+
+
+	}
+
+>>>>>>> 277001d084 (sketch out solve)
 	_createTrailingZerosBaseLayout( method, elementType ) {
 
 		const outputConvertNode = this._returnDataNode( elementType );
@@ -153,6 +189,7 @@ class BitcountNode extends MathNode {
 
 	}
 
+<<<<<<< HEAD
 	/**
 	 * Creates and registers a reusable GLSL function that emulates the behavior of countLeadingZeros.
 	 *
@@ -161,6 +198,8 @@ class BitcountNode extends MathNode {
 	 * @param {string} elementType - The type of the input value.
 	 * @returns {Function} - The generated function
 	 */
+=======
+>>>>>>> 277001d084 (sketch out solve)
 	_createLeadingZerosBaseLayout( method, elementType ) {
 
 		const outputConvertNode = this._returnDataNode( elementType );
@@ -225,6 +264,7 @@ class BitcountNode extends MathNode {
 
 	}
 
+<<<<<<< HEAD
 	/**
 	 * Creates and registers a reusable GLSL function that emulates the behavior of countOneBits.
 	 *
@@ -233,12 +273,18 @@ class BitcountNode extends MathNode {
 	 * @param {string} elementType - The type of the input value.
 	 * @returns {Function} - The generated function
 	 */
+=======
+>>>>>>> 277001d084 (sketch out solve)
 	_createOneBitsBaseLayout( method, elementType ) {
 
 		const outputConvertNode = this._returnDataNode( elementType );
 
 		const fnDef = Fn( ( [ value ] ) => {
 
+<<<<<<< HEAD
+=======
+
+>>>>>>> 277001d084 (sketch out solve)
 			const v = uint( 0.0 );
 
 			this._resolveElementType( value, v, elementType );
diff --git a/src/nodes/math/MathNode.js b/src/nodes/math/MathNode.js
index ca221ec1c0e671..8271477e82f6ec 100644
--- a/src/nodes/math/MathNode.js
+++ b/src/nodes/math/MathNode.js
@@ -364,9 +364,6 @@ MathNode.FWIDTH = 'fwidth';
 MathNode.TRANSPOSE = 'transpose';
 MathNode.DETERMINANT = 'determinant';
 MathNode.INVERSE = 'inverse';
-MathNode.COUNT_TRAILING_ZEROS = 'countTrailingZeros';
-MathNode.COUNT_LEADING_ZEROS = 'countLeadingZeros';
-MathNode.COUNT_ONE_BITS = 'countOneBits';
 
 // 2 inputs
 
@@ -1102,50 +1099,10 @@ export const atan2 = ( y, x ) => { // @deprecated, r172
 
 };
 
-
-/**
- * Finds the number of consecutive 0 bits from the least significant bit of the input value,
- * which is also the index of the least significant bit of the input value.
- *
- * Can only be used with {@link WebGPURenderer} and a WebGPU backend.
- *
- * @tsl
- * @function
- * @param {Node | number} x - The input value.
- * @returns {Node}
- */
-export const countTrailingZeros = /*@__PURE__*/ nodeProxyIntent( MathNode, MathNode.COUNT_TRAILING_ZEROS ).setParameterLength( 1 );
-
-/**
- * Finds the number of consecutive 0 bits starting from the most significant bit of the input value.
- *
- * Can only be used with {@link WebGPURenderer} and a WebGPU backend.
- *
- * @tsl
- * @function
- * @param {Node | number} x - The input value.
- * @returns {Node}
- */
-export const countLeadingZeros = /*@__PURE__*/ nodeProxyIntent( MathNode, MathNode.COUNT_LEADING_ZEROS ).setParameterLength( 1 );
-
-/**
- * Finds the number of '1' bits set in the input value
- *
- * Can only be used with {@link WebGPURenderer} and a WebGPU backend.
- *
- * @tsl
- * @function
- * @returns {Node}
- */
-export const countOneBits = /*@__PURE__*/ nodeProxyIntent( MathNode, MathNode.COUNT_ONE_BITS ).setParameterLength( 1 );
-
 // GLSL alias function
 
 export const faceforward = faceForward;
 export const inversesqrt = inverseSqrt;
-export const findLSB = countTrailingZeros;
-export const findMSB = countLeadingZeros;
-export const bitCount = countOneBits;
 
 // Method chaining
 
@@ -1208,6 +1165,3 @@ addMethodChaining( 'transpose', transpose );
 addMethodChaining( 'determinant', determinant );
 addMethodChaining( 'inverse', inverse );
 addMethodChaining( 'rand', rand );
-addMethodChaining( 'countTrailingZeros', countTrailingZeros );
-addMethodChaining( 'countLeadingZeros', countLeadingZeros );
-addMethodChaining( 'countOneBits', countOneBits );

From f070385215a3d756627ed900f2aca59399748b88 Mon Sep 17 00:00:00 2001
From: Christian Helgeson <62450112+cmhhelgeson@users.noreply.github.com>
Date: Sun, 9 Nov 2025 13:58:42 -0800
Subject: [PATCH 03/10] prefix_sum

---
 examples/jsm/gpgpu/BitonicSort.js         |   2 +-
 examples/jsm/gpgpu/PrefixSum.js           |  55 ++++--
 examples/webgpu_compute_prefix_sum.html   | 221 +++++++---------------
 examples/webgpu_compute_reduce.html       |   9 +-
 examples/webgpu_compute_sort_bitonic.html |   5 +-
 5 files changed, 124 insertions(+), 168 deletions(-)

diff --git a/examples/jsm/gpgpu/BitonicSort.js b/examples/jsm/gpgpu/BitonicSort.js
index 4059c23b02adca..32576011a2ca16 100644
--- a/examples/jsm/gpgpu/BitonicSort.js
+++ b/examples/jsm/gpgpu/BitonicSort.js
@@ -119,7 +119,7 @@ export class BitonicSort {
 		 *
 		 * @type {StorageBufferNode}
 		*/
-		this.workgroupSize = options.workgroupSize ? Math.min( this.dispatchSize, options.workgroupSize ) : Math.min( this.dispatchSize, 64 );
+		this.workgroupSize = options.workgroupSize ? Math.min( this.dispatchSize, options.workgroupSize ) : Math.min( this.dispatchSize, this.renderer.backend.device.limits.maxComputeWorkgroupSizeX );
 
 		/**
 		 * A node representing a workgroup scoped buffer that holds locally sorted elements.
diff --git a/examples/jsm/gpgpu/PrefixSum.js b/examples/jsm/gpgpu/PrefixSum.js
index f88d56bed84636..08c518bbd5a1af 100644
--- a/examples/jsm/gpgpu/PrefixSum.js
+++ b/examples/jsm/gpgpu/PrefixSum.js
@@ -1,4 +1,7 @@
-import { Fn, If, instancedArray, invocationLocalIndex, countTrailingZeros, Loop, workgroupArray, subgroupSize, workgroupBarrier, workgroupId, uint, select, invocationSubgroupIndex, dot, uvec4, vec4, float, subgroupAdd, array, subgroupShuffle, subgroupInclusiveAdd, subgroupBroadcast, invocationSubgroupMetaIndex, arrayBuffer } from 'three/tsl';
+import {
+	StorageInstancedBufferAttribute
+} from 'three';
+import { Fn, If, instancedArray, invocationLocalIndex, countTrailingZeros, Loop, workgroupArray, subgroupSize, workgroupBarrier, workgroupId, uint, select, invocationSubgroupIndex, dot, uvec4, vec4, float, subgroupAdd, array, subgroupShuffle, subgroupInclusiveAdd, subgroupBroadcast, invocationSubgroupMetaIndex, arrayBuffer, storage } from 'three/tsl';
 
 const divRoundUp = ( size, part_size ) => {
 
@@ -68,6 +71,12 @@ export class PrefixSum {
 		 */
 		this.renderer = renderer;
 
+		if ( this.renderer.backend.device === null ) {
+
+			renderer.backend.init();
+
+		}
+
 		/**
 		 * @type {PrefixSumStorageObjects}
 		 */
@@ -132,7 +141,14 @@ export class PrefixSum {
 		 *
 		 * @type {number}
 		*/
-		this.workgroupSize = options.workgroupSize ? options.workgroupSize : Math.min( this.vecCount, 64 );
+		this.workgroupSize = options.workgroupSize ? options.workgroupSize : Math.min( this.vecCount, this.renderer.backend.device.limits.maxComputeWorkgroupSizeX );
+
+		/**
+		 * The minimumn subgroup size specified by the renderer's graphics device.
+		 *
+		 * @type {number}
+		*/
+		this.minSubgroupSize = ( this.renderer.backend.device.adapterInfo && this.renderer.backend.device.adapterInfo.subgroupMinSize ) ? this.renderer.backend.device.adapterInfo.subgroupMinSize : 4;
 
 		/**
 		 * The maximum number of elements that will be read by an individual workgroup in the reduction step.
@@ -179,10 +195,17 @@ export class PrefixSum {
 	_createStorageBuffers( inputArray ) {
 
 		this.arrayBuffer = this.type === 'uint' ? Uint32Array.from( inputArray ) : Float32Array.from( inputArray );
+		this.outputArrayBuffer = this.type === 'uint' ? Uint32Array.from( inputArray ) : Float32Array.from( inputArray );
+
+		const inputAttribute = new StorageInstancedBufferAttribute( this.arrayBuffer, 1 );
+		const outputAttribute = new StorageInstancedBufferAttribute( this.outputArrayBuffer, 1 );
+
+		this.storageBuffers.dataBuffer = storage( inputAttribute, this.vecType, inputAttribute.count / 4 ).setName( `Prefix_Sum_Input_Vec_${id}` );
+		this.storageBuffers.unvectorizedDataBuffer = storage( inputAttribute, this.type, inputAttribute.count ).setName( `Prefix_Sum_Input_Unvec_${id}` );
+
+		this.storageBuffers.outputBuffer = storage( outputAttribute, this.vecType, outputAttribute.count / 4 ).setName( `Prefix_Sum_Output_Vec_${id}` );
+		this.storageBuffers.unvectorizedOutputBuffer = storage( outputAttribute, this.type, outputAttribute.count ).setName( `Prefix_Sum_Output_Unvec_${id}` );
 
-		this.storageBuffers.unvectorizedDataBuffer = instancedArray( this.arrayBuffer, this.type ).setPBO( true ).setName( `Prefix_Sum_Input_Unvec_${id}` );
-		this.storageBuffers.dataBuffer = instancedArray( this.arrayBuffer, this.vecType ).setPBO( true ).setName( `Prefix_Sum_Input_Vec_${id}` );
-		this.storageBuffers.outputBuffer = instancedArray( this.arrayBuffer, this.vecType ).setName( `Prefix_Sum_Output_${id}` );
 		this.storageBuffers.reductionBuffer = instancedArray( this.numWorkgroups, this.type ).setPBO( true ).setName( `Prefix_Sum_Reduction_${id}` );
 
 	}
@@ -472,6 +495,19 @@ export class PrefixSum {
 	_getSpineScanFn() {
 
 		const { reductionBuffer } = this.storageBuffers;
+
+		if ( this.numWorkgroups <= this.minSubgroupSize ) {
+
+			const fnDef = Fn( () => {
+
+				reductionBuffer.element( invocationSubgroupIndex ).assign( subgroupInclusiveAdd( reductionBuffer.element( invocationSubgroupIndex ) ) );
+
+			} )().compute( this.numWorkgroups, [ this.workgroupSize ] );
+
+			return fnDef;
+
+		}
+
 		const { subgroupReductionArray, unvectorizedSubgroupOffset, spineSize, subgroupSizeLog } = this.utilityNodes;
 		const { unvectorizedWorkPerInvocation } = this;
 
@@ -630,8 +666,6 @@ export class PrefixSum {
 
 		} )().compute( this.numWorkgroups, [ this.workgroupSize ] );
 
-		console.log( fnDef );
-
 		return fnDef;
 
 	}
@@ -639,7 +673,6 @@ export class PrefixSum {
 	_getDownsweepFn() {
 
 		const { dataBuffer, reductionBuffer, outputBuffer } = this.storageBuffers;
-		const { vecType } = this;
 		const { subgroupOffset, workgroupOffset, subgroupReductionArray, subgroupSizeLog, spineSize } = this.utilityNodes;
 
 		const { workPerInvocation, vecCount } = this;
@@ -958,9 +991,9 @@ export class PrefixSum {
 	 */
 	async compute() {
 
-		await this.computeStep( this.currentStep );
-		await this.computeStep( this.currentStep );
-		await this.computeStep( this.currentStep );
+		await this.computeReduce();
+		await this.computeSpineScan();
+		await this.computeDownsweep();
 
 	}
 
diff --git a/examples/webgpu_compute_prefix_sum.html b/examples/webgpu_compute_prefix_sum.html
index 5428e09fb32b46..67cccca332a16b 100644
--- a/examples/webgpu_compute_prefix_sum.html
+++ b/examples/webgpu_compute_prefix_sum.html
@@ -27,7 +27,7 @@
 		<script type="module">
 
 			import * as THREE from 'three/webgpu';
-			import { instancedArray, If, vec3, clamp, uv, uint, float, Fn, vec2, uvec2, floor, instanceIndex } from 'three/tsl';
+			import { vec3, vec4, uv, uint, Fn, vec2, uvec2, floor, instanceIndex } from 'three/tsl';
 
 			import { PrefixSum } from 'three/addons/gpgpu/PrefixSum.js';
 
@@ -58,21 +58,9 @@
 
 			const gui = new GUI();
 
-			const size = 32768;
-
-			// Set up canvas
-			const canvas = document.createElement( 'canvas' );
-			const canvasDim = 1000;
-			canvas.width = canvasDim;
-			canvas.height = canvasDim;
-
-			const canvasContext = canvas.getContext( '2d' );
-			canvasContext.font = '100px serif';
-			canvasContext.textBaseline = 'alphabetic';
-			canvasContext.textAlign = 'center';
+			let computeReset;
 
-
-			const canvasTemplateImage = new Image();
+			const size = 32768;
 
 			const getGridDimensions = ( reductionArray ) => {
 
@@ -85,119 +73,90 @@
 				return { gridWidth, gridHeight };
 
 			};
+			
+			async function init() {
 
-			const createCanvasTemplate = ( reductionArray ) => {
-
-				canvasContext.fillStyle = '#FFFFFF';
-				canvasContext.fillRect( 0, 0, canvasDim, canvasDim );
-
-				canvasContext.strokeStyle = 'black';
-				canvasContext.lineWidth = 2;
-
-				const { gridWidth, gridHeight } = getGridDimensions( reductionArray );
-
-				const blockHeight = canvasDim / gridHeight;
-				const blockWidth = canvasDim / gridWidth;
-
-				canvasContext.beginPath();
-
-				for ( let i = 0; i < gridHeight - 1; i ++ ) {
-
-					const horizontalLineOffsetY = blockHeight * ( i + 1 );
-
-					canvasContext.moveTo( 0, horizontalLineOffsetY );
-					canvasContext.lineTo( canvasDim, horizontalLineOffsetY );
+				const aspect = window.innerWidth / window.innerHeight;
+				const camera = new THREE.OrthographicCamera( - aspect, aspect, 1, - 1, 0, 2 );
+				camera.position.z = 1;
 
-				}
+				const scene = new THREE.Scene();
+				scene.background = new THREE.Color( 0x212121 );
 
-				for ( let i = 0; i < gridWidth - 1; i ++ ) {
+				const dynamicArray = [];
 
-					const verticalLineOffsetX = blockWidth * ( i + 1 );
+				for ( let i = 0; i < size; i ++ ) {
 
-					canvasContext.moveTo( verticalLineOffsetX, 0 );
-					canvasContext.lineTo( verticalLineOffsetX, canvasDim );
+					dynamicArray.push( 1 );
 
 				}
 
-				canvasContext.stroke();
+				const renderer = new THREE.WebGPURenderer( { antialias: false } );
+				renderer.setPixelRatio( window.devicePixelRatio );
+				renderer.setSize( window.innerWidth, window.innerHeight );
 
-				canvasTemplateImage.src = canvas.toDataURL( 'image/png' );
+				await renderer.init();
 
-			};
+				const prefixSumModule = new PrefixSum( renderer, dynamicArray, 'uint' );
 
-			const updateCanvas = ( reductionArray = [ 1, 1, 1, 1, 1, 1, 1, 1 ] ) => {
+				computeReset = Fn( () => {
 
-				canvasContext.clearRect( 0, 0, canvasDim, canvasDim );
-				canvasContext.drawImage( canvasTemplateImage, 0, 0 );
+					const { outputBuffer } = prefixSumModule.storageBuffers;
 
-				const { gridWidth, gridHeight } = getGridDimensions( reductionArray );
+					outputBuffer.element( instanceIndex ).assign( vec4( 1, 1, 1, 1 ) );
 
-				const blockHeight = canvasDim / gridHeight;
-				const blockWidth = canvasDim / gridWidth;
-				const halfBlockWidth = blockWidth / 2;
-				const halfBlockHeight = blockHeight / 2;
+				} )().compute( size, [ prefixSumModule.workgroupSize ] );
 
-				for ( let i = 0; i < reductionArray.length; i ++ ) {
+				const getElementIndex = Fn( ( [ uvNode, gridWidth, gridHeight ] ) => {
 
-					const metrics = canvasContext.measureText( `${reductionArray[ i ]}` );
+					const newUV = uvNode.mul( vec2( gridWidth, gridHeight ) );
+					const pixel = uvec2( uint( floor( newUV.x ) ), uint( floor( newUV.y ) ) );
+					const elementIndex = uint( gridWidth ).mul( pixel.y ).add( pixel.x );
 
-					const textHeight = metrics.actualBoundingBoxAscent + metrics.actualBoundingBoxDescent;
+					return elementIndex;
 
-					const verticalOffset = textHeight / 2 - metrics.actualBoundingBoxDescent;
+				}, {
+					uvNode: 'vec2',
+					gridWidth: 'uint',
+					gridHeight: 'uint',
+					return: 'uint'
+				} );
 
-					console.log( verticalOffset );
+				const getColor = Fn( ( [ colorChanger, gridWidth, gridHeight ] ) => {
 
-					const xGridCoord = i % gridWidth;
-					const yGridCoord = Math.floor( i / gridWidth );
+					const subtracter = colorChanger.div( gridWidth.mul( gridHeight ) );
+					return vec3( subtracter.oneMinus() ).toVar();
 
-					canvasContext.fillText(
-						`${reductionArray[ i ]}`,
-						( blockWidth * xGridCoord ) + ( halfBlockWidth + metrics.width / 2 ),
-						( blockHeight * yGridCoord ) + ( halfBlockHeight + verticalOffset )
-					);
+				}, {
+					colorChanger: 'float',
+					gridWidth: 'float',
+					gridHeight: 'float',
+					return: 'vec3'
+				} );
 
-				}
+				const createDisplayMesh = ( elementsStorage ) => {
 
-				canvasContext.fillStyle = 'blue';
-				//canvasContext.fillRect( 0, 0, 20, 20 );
+					const material = new THREE.MeshBasicNodeMaterial( { color: 0x00ff00 } );
 
-			};
+					const display = Fn( () => {
 
-			createCanvasTemplate( [ 1, 1, 1, 1, 1, 1, 1, 1 ] );
+						const { gridWidth, gridHeight } = getGridDimensions( dynamicArray );
 
-			let bufferOutput;
+						const elementIndex = getElementIndex( uv(), gridWidth, gridHeight );
 			
-			async function init() {
-
-				const aspect = window.innerWidth / window.innerHeight;
-				const camera = new THREE.OrthographicCamera( - aspect, aspect, 1, - 1, 0, 2 );
-				camera.position.z = 1;
-
-				const scene = new THREE.Scene();
-				scene.background = new THREE.Color( 0x212121 );
-
-				const dynamicArray = [];
-
-				for ( let i = 0; i < size; i ++ ) {
+						const color = getColor( elementsStorage.element( elementIndex ), gridWidth, gridHeight ).toVar();
 
-					dynamicArray.push( 1 );
-
-				}
-
-				const renderer = new THREE.WebGPURenderer( { antialias: false } );
-				renderer.setPixelRatio( window.devicePixelRatio );
-				renderer.setSize( window.innerWidth, window.innerHeight );
+						return color;
 
-				await renderer.init();
+					} );
 
-				const prefixSumModule = new PrefixSum( renderer, dynamicArray, 'uint', {
-					workgroupSize: 256
-				} );
+					material.colorNode = display();
+					const plane = new THREE.Mesh( new THREE.PlaneGeometry( 1, 1 ), material );
+					return plane;
 
-				const canvasMap = new THREE.CanvasTexture( canvas );
+				};
 
-				const plane = new THREE.Mesh( new THREE.PlaneGeometry( 1, 1 ), new THREE.MeshBasicNodeMaterial( { map: canvasMap } ) );
-				scene.add( plane );
+				scene.add( createDisplayMesh( prefixSumModule.storageBuffers.unvectorizedOutputBuffer ) );
 
 				const animate = () => {
 
@@ -205,25 +164,7 @@
 
 				};
 
-				const debugFolder = gui.addFolder( 'Debug' );
-				debugFolder.add( effectController, 'displayedBuffer', Object.values( PREFIX_SUM_BUFFER_TYPES ) ).name( 'Buffer to Display' ).onChange( async () => {
-
-					updateCanvas();
-
-					plane.material.map.needsUpdate = true;
-
-					if ( effectController.displayedBuffer === 'Reduction' ) {
-
-						bufferOutput = new Uint32Array( await renderer.getArrayBufferAsync( prefixSumModule.storageBuffers.reductionBuffer.value ) );
-
-					} else {
-
-						bufferOutput = [ 1, 1, 1, 1, 1, 1, 1, 1, 1 ];
-			
-					}
-
-				} );
-				debugFolder.add( effectController, 'loggedBuffer', Object.values( PREFIX_SUM_BUFFER_TYPES ) ).name( 'Buffer to Log' );
+				gui.add( effectController, 'loggedBuffer', Object.values( PREFIX_SUM_BUFFER_TYPES ) ).name( 'Buffer to Log' );
 				const logFunctionName = 'Log Selected Buffer';
 				const functionObj = {};
 				functionObj[ logFunctionName ] = async() => {
@@ -236,7 +177,7 @@
 
 					} else if ( effectController.loggedBuffer === PREFIX_SUM_BUFFER_TYPES.INPUT_UNVECTORIZED ) {
 
-						selectedBuffer = prefixSumModule.storageBuffers.unvectorizedDataBuffer;
+						selectedBuffer = prefixSumModule.storageBuffers.unvectorizedOutputBuffer;
 
 					} else if ( effectController.loggedBuffer === PREFIX_SUM_BUFFER_TYPES.INPUT_VECTORIZED ) {
 
@@ -258,55 +199,35 @@
 
 				};
 
-				debugFolder.add( functionObj, 'Log Selected Buffer' );
-				debugFolder.add( functionObj, 'Log Prefix Sum Class' );
-
-				prefixSumModule.computeReduce();
-				prefixSumModule.computeSpineScan();
-				prefixSumModule.computeDownsweep();
+				gui.add( functionObj, 'Log Selected Buffer' );
+				gui.add( functionObj, 'Log Prefix Sum Class' );
 
 				renderer.setAnimationLoop( animate );
 				document.body.appendChild( renderer.domElement );
 
-				/*const stepAnimation = async function () {
-
-					switch ( effectController.currentStep ) {
-
-						case 'Reduce': {
-
-							prefixSumModule.computeReduce();
+				let prefixSumComplete = false;
 
-						}
+				const stepAnimation = async function () {
 
-						case 'Spine_Scan': {
+					if ( prefixSumComplete ) {
 
-							prefixSumModule.computeSpineScan();
+						console.log( 'reset prefix sum' );
 
-						}
-
-						case 'Downsweep': {
-
-							prefixSumModule.computeDownsweep();
-
-						}
-
-						case 'Step_Full': {
+						renderer.compute( computeReset );
+			
+					} else {
 
-							prefixSumModule.computeStep();
+						console.log( 'compute prefix sum' );
 
-						}
+						await prefixSumModule.compute();
 
 					}
 
-					prefixSumModule.computeStep();
-
-
-					renderer.render( scene, camera );
-					renderer.resolveTimestampsAsync( THREE.TimestampQuery.RENDER );
+					prefixSumComplete = ! prefixSumComplete;
 
 					setTimeout( stepAnimation, 1000 );
 
-				}; */
+				};
 
 
 				window.addEventListener( 'resize', onWindowResize );
@@ -328,7 +249,7 @@
 
 				}
 
-				//setTimeout( stepAnimation, 1000 );
+				stepAnimation();
 
 			}
 
diff --git a/examples/webgpu_compute_reduce.html b/examples/webgpu_compute_reduce.html
index d14ac0994fd439..40f0f279c97fe5 100644
--- a/examples/webgpu_compute_reduce.html
+++ b/examples/webgpu_compute_reduce.html
@@ -992,8 +992,13 @@ <h3 id="panel-title" style="flex: 0 0 auto;">Subgroup Reduction Explanation</h3>
 
 				await renderer.init();
 
-				// Unfortunately, need to arbitrarily run compute shader to get access to device limits
-				renderer.compute( computeResetBuffer );
+				// Arbitrarily initialize renderer backend before rendering begins to access WebGPU device limits
+
+				if ( renderer.backend.device === null ) {
+
+					renderer.backend.init();
+			
+				}
 
 
 				if ( renderer.backend.device !== null ) {
diff --git a/examples/webgpu_compute_sort_bitonic.html b/examples/webgpu_compute_sort_bitonic.html
index dda83067da2614..0c346418e4efe3 100644
--- a/examples/webgpu_compute_sort_bitonic.html
+++ b/examples/webgpu_compute_sort_bitonic.html
@@ -324,10 +324,7 @@
 
 				const bitonicSortModule = new BitonicSort(
 					renderer,
-					currentElementsStorage,
-					{
-						workgroupSize: 64,
-					}
+					currentElementsStorage
 				);
 
 				scene.add( createDisplayMesh2( currentElementsStorage, bitonicSortModule.infoStorage ) );

From 3099f700165af2352c4c6a58acc375cd969c3d05 Mon Sep 17 00:00:00 2001
From: Christian Helgeson <62450112+cmhhelgeson@users.noreply.github.com>
Date: Sun, 9 Nov 2025 14:15:24 -0800
Subject: [PATCH 04/10] fix nodeBuilder rebase error

---
 src/nodes/core/NodeBuilder.js | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/src/nodes/core/NodeBuilder.js b/src/nodes/core/NodeBuilder.js
index ef237e1798ed8f..822e74871ee1e4 100644
--- a/src/nodes/core/NodeBuilder.js
+++ b/src/nodes/core/NodeBuilder.js
@@ -1226,16 +1226,9 @@ class NodeBuilder {
 			if ( type === 'float' || type === 'int' || type === 'uint' ) value = 0;
 			else if ( type === 'bool' ) value = false;
 			else if ( type === 'color' ) value = new Color();
-<<<<<<< HEAD
 			else if ( type === 'vec2' || type === 'uvec2' || type === 'ivec2' ) value = new Vector2();
 			else if ( type === 'vec3' || type === 'uvec3' || type === 'ivec3' ) value = new Vector3();
 			else if ( type === 'vec4' || type === 'uvec4' || type === 'ivec4' ) value = new Vector4();
-=======
-			else if ( type === 'vec2' || type === 'uvec2' ) value = new Vector2();
-			else if ( type === 'vec3' || type === 'uvec3' ) value = new Vector3();
-			// vec4 defaults to (0, 0, 0, 1)
-			else if ( type === 'vec4' || type === 'uvec4' ) value = new Vector4();
->>>>>>> d83ef1ebb4 (sketch out solve)
 
 		}
 

From 253d01e25c1e51975abdb85119e8e40f2faa1bc5 Mon Sep 17 00:00:00 2001
From: Christian Helgeson <62450112+cmhhelgeson@users.noreply.github.com>
Date: Tue, 11 Nov 2025 10:42:46 -0800
Subject: [PATCH 05/10] work

---
 examples/jsm/gpgpu/PrefixSum.js    |   2 +-
 examples/webgpu_compute_birds.html | 199 ++++++++++++++++++++++++++++-
 2 files changed, 197 insertions(+), 4 deletions(-)

diff --git a/examples/jsm/gpgpu/PrefixSum.js b/examples/jsm/gpgpu/PrefixSum.js
index 08c518bbd5a1af..0c9b5d958d674f 100644
--- a/examples/jsm/gpgpu/PrefixSum.js
+++ b/examples/jsm/gpgpu/PrefixSum.js
@@ -58,7 +58,7 @@ export class PrefixSum {
 	 * Constructs a new light probe helper.
 	 *
 	 * @param {Renderer} renderer - A renderer with the ability to execute compute operations.
-	 * @param {number[]} inputArray - The data buffer to sum.
+	 * @param {number[] | TypedArray | StorageInstancedBufferAttribute } inputArray - The data buffer to sum.
 	 * @param {'uint' | 'float'} inputArrayType - Type of input array
 	 * @param {Object} [options={}] - Options that modify the behavior of the prefix sum.
 	 */
diff --git a/examples/webgpu_compute_birds.html b/examples/webgpu_compute_birds.html
index a5f64a6b1d434a..a6ea8a0c47e3d9 100644
--- a/examples/webgpu_compute_birds.html
+++ b/examples/webgpu_compute_birds.html
@@ -35,9 +35,9 @@
 		<script type="module">
 
 			import * as THREE from 'three/webgpu';
-			import { uniform, varying, vec4, add, sub, max, dot, sin, mat3, uint, negate, instancedArray, cameraProjectionMatrix, cameraViewMatrix, positionLocal, modelWorldMatrix, sqrt, float, Fn, If, cos, Loop, Continue, normalize, instanceIndex, length, vertexIndex } from 'three/tsl';
-
+			import { uniform, varying, vec4, add, abs, sub, max, dot, sin, mat3, uint, negate, instancedArray, cameraProjectionMatrix, cameraViewMatrix, positionLocal, modelWorldMatrix, sqrt, float, Fn, If, cos, Loop, Continue, normalize, instanceIndex, length, vertexIndex } from 'three/tsl';
 			import { Inspector } from 'three/addons/inspector/Inspector.js';
+			import { PrefixSum } from 'three/addons/gpgpu/PrefixSum.js';
 
 			import { OrbitControls } from 'three/addons/controls/OrbitControls.js';
 
@@ -49,9 +49,10 @@
 			let last = performance.now();
 
 			let pointer, raycaster;
-			let computeVelocity, computePosition, effectController;
+			let computeVelocity, computePosition, effectController, computeHash;
 
 			const BIRDS = 16384;
+			const TABLE_SIZE = BIRDS * 2;
 			const SPEED_LIMIT = 9.0;
 			const BOUNDS = 800, BOUNDS_HALF = BOUNDS / 2;
 
@@ -175,6 +176,8 @@
 				const velocityArray = new Float32Array( BIRDS * 3 );
 				const phaseArray = new Float32Array( BIRDS );
 
+			
+
 				for ( let i = 0; i < BIRDS; i ++ ) {
 
 					const posX = Math.random() * BOUNDS - BOUNDS_HALF;
@@ -287,6 +290,196 @@
 
 				// Define GPU Compute shaders.
 				// Shaders are computationally identical to their GLSL counterparts outside of texture destructuring.
+				const intCoord = Fn( ( [ x, spacing ] ) => {
+
+					return floor( x.div( spacing ) );
+
+				} ).setLayout( {
+					name: 'intCoord',
+					type: 'int',
+					inputs: [
+						{ name: 'x', type: 'float' },
+						{ name: 'spacing', type: 'float' }
+					]
+				} );
+
+				const hash3DFloatCoord = Fn( ( [ v ] ) => {
+
+					const x = intCoord( v.x );
+					const y = intCoord( v.y );
+					const z = intCoord( v.z );
+
+					const xMul = x.mul( 92837111 );
+					const yMul = y.mul( 689287499 );
+					const zMul = z.mul( 283923481 );
+
+					const h = xMul.pow( yMul ).pow( zMul );
+
+					return abs( h ).modInt( TABLE_SIZE );
+
+				} ).setLayout( {
+					name: 'hash3DCoord',
+					type: 'int',
+					inputs: [
+						{ name: 'v', type: 'vec3' },
+						{ name: 'spacing', type: 'float' }
+					]
+				} );
+
+				const hash3DIntCoord = Fn( ( [ x, y, z ] ) => {
+
+					const xMul = x.mul( 92837111 );
+					const yMul = y.mul( 689287499 );
+					const zMul = z.mul( 283923481 );
+
+					const h = xMul.pow( yMul ).pow( zMul );
+
+					return abs( h ).modInt( TABLE_SIZE );
+
+				} ).setLayout( {
+					name: 'hash3DIntCoord',
+					type: 'int',
+					inputs: [
+						{ name: 'x', type: 'int' },
+						{ name: 'y', type: 'int'}
+						{ name: 'z', type: 'int'}
+					]
+				} );
+
+				class Hash {
+
+					constructor( renderer, spacing, maxNumObjects ) {
+
+						this.spacing = spacing;
+						this.tableSize = maxNumObjects * 2;
+						this.cellStartArrayBuffer = new Int32Array( this.tableSize + 1 );
+						this.cellStartStorage = instancedArray( this.cellStartArrayBuffer, 'int' );
+						this.cellEntriesArrayBuffer = new Int32Array( maxNumObjects );
+						this.cellEntriesStorage = instancedArray( this.cellEntriesArrayBuffer, 'int' );
+						this.queryIdsArrayBuffer = new Int32Array( maxNumObjects );
+						this.queryIdsStorage = instancedArray( this.queryIdsArrayBuffer, 'int' );
+						this.querySize = 0;
+
+						// TODO: Check if faster to replace with .fill() operation
+						this.resetFn = this._createResetFn( maxNumObjects );
+						this.calculateHashFn = this._createCalculateHashFn( maxNumObjects );
+						this.prefixSumModule = new PrefixSum( renderer, [ 1, 2, 3, 4, 5, 6, 7, 8 ], 'uint' );
+						this.calculateCellEntriesFn = this._createCalculateCellEntriesFn( maxNumObjects );
+			
+					}
+
+					_createResetFn( maxNumObjects ) {
+
+						const fnDef = Fn( () => {
+
+							const { cellStartStorage, cellEntriesStorage } = this;
+
+							cellStartStorage.element( instanceIndex ).assign( 0 );
+							cellEntriesStorage.element( instanceIndex ).assign( 0 );
+			
+						} )().compute( maxNumObjects );
+
+						return fnDef;
+
+					}
+
+					_createCalculateHashFn( maxNumObjects ) {
+
+						const fnDef = Fn( () => {
+
+							const { cellStartStorage } = this;
+
+							const hashedCoord = hash3DFloatCoord( positionStorage.element( instanceIndex ) );
+							cellStartStorage.element( hashedCoord ).addAssign( 1 );
+			
+						} )().compute( maxNumObjects );
+
+						return fnDef;
+
+					}
+
+					_createCalculateCellEntriesFn( maxNumObjects ) {
+
+						const fnDef = Fn( () => {
+
+							const { cellStartStorage, cellEntriesStorage } = this;
+
+							const hashedCoord = hash3DCoord( positionStorage.element( instanceIndex ) );
+							cellStartStorage.element( hashedCoord ).subAssign( 1 );
+
+							// TODO: Prevent errors from two threads substracting the same cellStart entry at the same time
+							const cellStart = cellStartStorage.element( hashedCoord ).toVar();
+							cellEntriesStorage.element( cellStart ).assign( instanceIndex );
+
+						} )().compute( maxNumObjects );
+
+						return fnDef;
+
+					}
+
+					query() {
+
+						const fnDef = Fn( ( [ position, maxDist ] ) => {
+
+							const {queryIdsStorage, cellEntriesStorage} = this;
+
+							const x0 = intCoord( position.x.sub( maxDist ) );
+							const y0 = intCoord( position.y.sub( maxDist ) );
+							const z0 = intCoord( position.z.sub( maxDist ) );
+
+							const x1 = intCoord( position.x.sub( maxDist ) );
+							const y1 = intCoord( position.y.sub( maxDist ) );
+							const z1 = intCoord( position.z.sub( maxDist ) );
+
+							const xi = uint( x0 ).toVar();
+							const yi = uint( y0 ).toVar();
+							const zi = uint( z0 ).toVar();
+
+							const querySize = uint(0).toVar();
+
+							Loop( xi.lessThanEqual( x1 ), () => {
+
+								Loop( yi.lessThanEqual( y1 ), () => {
+
+									Loop( zi.lessThanEqual( z1 ), () => {
+
+										const hashedCoord = hash3DIntCoord(xi, yi, zi);
+										const start = cellStartStorage.element(hashedCoord);
+										const end = cellStartStorage.element(hashedCoord.add(1))
+
+										Loop({start: start, end: end, condition: '<', type: 'int'}, ({i}) => {
+
+											queryIdsStorage.element(querySize).assign(cellEntriesStorage.element(i))
+											querySize.addAssign(1)
+
+										})
+										
+										zi.addAssign( 1 );
+
+									} );
+
+									yi.addAssign( 1 );
+
+								} );
+
+								xi.addAssign( 1 );
+
+							} );
+
+						} ).setLayout( {
+							name: 'queryPosition',
+							type: 'float',
+							inputs: [
+								{ name: 'pos', type: 'vec3' },
+								{ name: 'maxDist', type: 'float' }
+							]
+						} );
+
+						return fnDef;
+
+					}
+
+				}
 
 				computeVelocity = Fn( () => {
 

From d7c8d34cbe0812fc226b900180fee5bd95c48d09 Mon Sep 17 00:00:00 2001
From: Christian Helgeson <62450112+cmhhelgeson@users.noreply.github.com>
Date: Fri, 14 Nov 2025 14:25:56 -0800
Subject: [PATCH 06/10] fix bitcount

---
 src/nodes/math/BitcountNode.js | 46 ----------------------------------
 1 file changed, 46 deletions(-)

diff --git a/src/nodes/math/BitcountNode.js b/src/nodes/math/BitcountNode.js
index 55572dd76a7a31..e11f63b2810cf0 100644
--- a/src/nodes/math/BitcountNode.js
+++ b/src/nodes/math/BitcountNode.js
@@ -1,8 +1,4 @@
-<<<<<<< HEAD
 import { float, Fn, If, nodeProxyIntent, uint, int, uvec2, uvec3, uvec4, ivec2, ivec3, ivec4 } from '../tsl/TSLCore.js';
-=======
-import { addMethodChaining, float, Fn, If, nodeProxyIntent, uint, int, uvec2, uvec3, uvec4, ivec2, ivec3, ivec4 } from '../tsl/TSLCore.js';
->>>>>>> 277001d084 (sketch out solve)
 import { bitcast, floatBitsToUint } from './BitcastNode.js';
 import MathNode, { negate } from './MathNode.js';
 
@@ -45,10 +41,7 @@ class BitcountNode extends MathNode {
 	/**
 	 * Casts the input value of the function to an integer if necessary.
 	 *
-<<<<<<< HEAD
 	 * @private
-=======
->>>>>>> 277001d084 (sketch out solve)
 	 * @param {Node<uint>|Node<int>} inputNode - The input value.
 	 * @param {Node<uint>} outputNode - The output value.
 	 * @param {string} elementType - The type of the input value.
@@ -67,25 +60,6 @@ class BitcountNode extends MathNode {
 
 	}
 
-<<<<<<< HEAD
-=======
-	_returnBaseDataNode( elementType ) {
-
-		if ( elementType === 'uint' ) {
-
-			return uint;
-
-		}
-
-		if ( elementType === 'int' ) {
-
-			return int;
-
-		}
-
-	}
-
->>>>>>> 277001d084 (sketch out solve)
 	_returnDataNode( inputType ) {
 
 		switch ( inputType ) {
@@ -138,7 +112,6 @@ class BitcountNode extends MathNode {
 
 			}
 
-<<<<<<< HEAD
 		}
 
 	}
@@ -151,15 +124,6 @@ class BitcountNode extends MathNode {
 	 * @param {string} elementType - The type of the input value.
 	 * @returns {Function} - The generated function
 	 */
-=======
-
-		}
-
-
-
-	}
-
->>>>>>> 277001d084 (sketch out solve)
 	_createTrailingZerosBaseLayout( method, elementType ) {
 
 		const outputConvertNode = this._returnDataNode( elementType );
@@ -189,7 +153,6 @@ class BitcountNode extends MathNode {
 
 	}
 
-<<<<<<< HEAD
 	/**
 	 * Creates and registers a reusable GLSL function that emulates the behavior of countLeadingZeros.
 	 *
@@ -198,8 +161,6 @@ class BitcountNode extends MathNode {
 	 * @param {string} elementType - The type of the input value.
 	 * @returns {Function} - The generated function
 	 */
-=======
->>>>>>> 277001d084 (sketch out solve)
 	_createLeadingZerosBaseLayout( method, elementType ) {
 
 		const outputConvertNode = this._returnDataNode( elementType );
@@ -264,7 +225,6 @@ class BitcountNode extends MathNode {
 
 	}
 
-<<<<<<< HEAD
 	/**
 	 * Creates and registers a reusable GLSL function that emulates the behavior of countOneBits.
 	 *
@@ -273,18 +233,12 @@ class BitcountNode extends MathNode {
 	 * @param {string} elementType - The type of the input value.
 	 * @returns {Function} - The generated function
 	 */
-=======
->>>>>>> 277001d084 (sketch out solve)
 	_createOneBitsBaseLayout( method, elementType ) {
 
 		const outputConvertNode = this._returnDataNode( elementType );
 
 		const fnDef = Fn( ( [ value ] ) => {
 
-<<<<<<< HEAD
-=======
-
->>>>>>> 277001d084 (sketch out solve)
 			const v = uint( 0.0 );
 
 			this._resolveElementType( value, v, elementType );

From 3ab0f21bdbc45eebc3c184932a2c6507665a5689 Mon Sep 17 00:00:00 2001
From: Christian Helgeson <62450112+cmhhelgeson@users.noreply.github.com>
Date: Sun, 16 Nov 2025 20:44:52 -0800
Subject: [PATCH 07/10] fix issues with non % 4 sizes

---
 examples/jsm/gpgpu/PrefixSum.js         |  97 +++++++++---
 examples/webgpu_compute_birds.html      | 200 +-----------------------
 examples/webgpu_compute_prefix_sum.html |  16 +-
 src/Three.TSL.js                        |   3 -
 4 files changed, 86 insertions(+), 230 deletions(-)

diff --git a/examples/jsm/gpgpu/PrefixSum.js b/examples/jsm/gpgpu/PrefixSum.js
index 0c9b5d958d674f..8dd7e4e1787d07 100644
--- a/examples/jsm/gpgpu/PrefixSum.js
+++ b/examples/jsm/gpgpu/PrefixSum.js
@@ -1,14 +1,48 @@
 import {
-	StorageInstancedBufferAttribute
+	StorageInstancedBufferAttribute,
 } from 'three';
 import { Fn, If, instancedArray, invocationLocalIndex, countTrailingZeros, Loop, workgroupArray, subgroupSize, workgroupBarrier, workgroupId, uint, select, invocationSubgroupIndex, dot, uvec4, vec4, float, subgroupAdd, array, subgroupShuffle, subgroupInclusiveAdd, subgroupBroadcast, invocationSubgroupMetaIndex, arrayBuffer, storage } from 'three/tsl';
 
 const divRoundUp = ( size, part_size ) => {
 
+	console.log( Math.floor( ( size + part_size - 1 ) / part_size ) );
+
 	return Math.floor( ( size + part_size - 1 ) / part_size );
 
 };
 
+const getTypeFromTypedArray = ( typedArray ) => {
+
+	switch ( typedArray.constructor.name ) {
+
+		case 'Float32Array': {
+
+			return 'float';
+
+		}
+
+		case 'Int32Array': {
+
+			return 'int';
+
+		}
+
+		case 'Uint32Array': {
+
+			return 'uint';
+
+		}
+
+		default: {
+
+			return typedArray.constructor.name.substring( 0, - 6 ).toLowerCase();
+
+		}
+
+	}
+
+};
+
 let id = 0;
 
 /**
@@ -58,11 +92,10 @@ export class PrefixSum {
 	 * Constructs a new light probe helper.
 	 *
 	 * @param {Renderer} renderer - A renderer with the ability to execute compute operations.
-	 * @param {number[] | TypedArray | StorageInstancedBufferAttribute } inputArray - The data buffer to sum.
-	 * @param {'uint' | 'float'} inputArrayType - Type of input array
+	 * @param {TypedArray} inputArray - The data buffer to sum. Must have n % 4 == 0 num elements
 	 * @param {Object} [options={}] - Options that modify the behavior of the prefix sum.
 	 */
-	constructor( renderer, inputArray, inputArrayType, options = {} ) {
+	constructor( renderer, inputArray, options = {} ) {
 
 		/**
 		 * A reference to the renderer.
@@ -93,8 +126,25 @@ export class PrefixSum {
 		 */
 		this.utilityNodes = {};
 
-		this.type = inputArrayType;
-		this.vecType = inputArrayType === 'uint' ? 'uvec4' : 'vec4';
+		/**
+		 * The type of each individual data element.
+		 *
+		 * @type {number}
+		 */
+		this.type = getTypeFromTypedArray( inputArray );
+
+		this.vecType = 'vec4';
+
+		if ( this.type === 'int' ) {
+
+			this.vecType = 'ivec4';
+
+		} else if ( this.type === 'uint' ) {
+
+			this.vecType = 'uvec4';
+
+		}
+
 
 		/**
 		 * The size of the data.
@@ -103,6 +153,21 @@ export class PrefixSum {
 		 */
 		this.count = inputArray.length;
 
+		// Allign size of buffer to vec4
+		if ( inputArray.length % 4 !== 0 ) {
+
+			const missingElements = ( 4 - inputArray.length % 4 );
+			const bytesToAdd = missingElements * inputArray.constructor.BYTES_PER_ELEMENT;
+			this.inputArrayBuffer = new inputArray.constructor( new ArrayBuffer( inputArray.byteLength + bytesToAdd ) );
+			this.inputArrayBuffer.set( [ ...inputArray, ...Array( missingElements ).fill( 0 ) ] );
+
+		} else {
+
+			this.inputArrayBuffer = new inputArray.constructor( new ArrayBuffer( inputArray.byteLength ) );
+			this.inputArrayBuffer.set( inputArray );
+
+		}
+
 		/**
 		 * The number of 4-dimensional vectors needed to fully represent the data in the data buffer.
 		 * Buffers where this.count % 4 !== 0 will need an additional vec4 to hold the data buffer's
@@ -112,12 +177,6 @@ export class PrefixSum {
 		 */
 		this.vecCount = divRoundUp( this.count, 4 );
 
-		while ( inputArray.length % 4 !== 0 ) {
-
-			inputArray.push( 0 );
-
-		}
-
 		/**
 		 * The number of 4-dimensional vectors that will be read from global storage in each invocation of the reduction/downsweep step.
 		 * Defaults to 4.
@@ -173,7 +232,7 @@ export class PrefixSum {
 		*/
 		this.dispatchSize = this.numWorkgroups * this.workgroupSize;
 
-		this._createStorageBuffers( inputArray, inputArrayType, this.vecType, this.numWorkgroups );
+		this._createStorageBuffers();
 		this._createUtilityNodes();
 
 		/**
@@ -192,18 +251,18 @@ export class PrefixSum {
 
 	}
 
-	_createStorageBuffers( inputArray ) {
+	_createStorageBuffers() {
 
-		this.arrayBuffer = this.type === 'uint' ? Uint32Array.from( inputArray ) : Float32Array.from( inputArray );
-		this.outputArrayBuffer = this.type === 'uint' ? Uint32Array.from( inputArray ) : Float32Array.from( inputArray );
+		this.outputArrayBuffer = new this.inputArrayBuffer.constructor( new ArrayBuffer( this.inputArrayBuffer.byteLength ) );
+		this.outputArrayBuffer.set( this.inputArrayBuffer );
 
-		const inputAttribute = new StorageInstancedBufferAttribute( this.arrayBuffer, 1 );
+		const inputAttribute = new StorageInstancedBufferAttribute( this.inputArrayBuffer, 1 );
 		const outputAttribute = new StorageInstancedBufferAttribute( this.outputArrayBuffer, 1 );
 
-		this.storageBuffers.dataBuffer = storage( inputAttribute, this.vecType, inputAttribute.count / 4 ).setName( `Prefix_Sum_Input_Vec_${id}` );
+		this.storageBuffers.dataBuffer = storage( inputAttribute, this.vecType, this.vecCount ).setName( `Prefix_Sum_Input_Vec_${id}` );
 		this.storageBuffers.unvectorizedDataBuffer = storage( inputAttribute, this.type, inputAttribute.count ).setName( `Prefix_Sum_Input_Unvec_${id}` );
 
-		this.storageBuffers.outputBuffer = storage( outputAttribute, this.vecType, outputAttribute.count / 4 ).setName( `Prefix_Sum_Output_Vec_${id}` );
+		this.storageBuffers.outputBuffer = storage( outputAttribute, this.vecType, this.vecCount ).setName( `Prefix_Sum_Output_Vec_${id}` );
 		this.storageBuffers.unvectorizedOutputBuffer = storage( outputAttribute, this.type, outputAttribute.count ).setName( `Prefix_Sum_Output_Unvec_${id}` );
 
 		this.storageBuffers.reductionBuffer = instancedArray( this.numWorkgroups, this.type ).setPBO( true ).setName( `Prefix_Sum_Reduction_${id}` );
diff --git a/examples/webgpu_compute_birds.html b/examples/webgpu_compute_birds.html
index a6ea8a0c47e3d9..b24df84b2c24d0 100644
--- a/examples/webgpu_compute_birds.html
+++ b/examples/webgpu_compute_birds.html
@@ -1,3 +1,4 @@
+
 <!DOCTYPE html>
 <html lang="en">
 	<head>
@@ -35,9 +36,9 @@
 		<script type="module">
 
 			import * as THREE from 'three/webgpu';
-			import { uniform, varying, vec4, add, abs, sub, max, dot, sin, mat3, uint, negate, instancedArray, cameraProjectionMatrix, cameraViewMatrix, positionLocal, modelWorldMatrix, sqrt, float, Fn, If, cos, Loop, Continue, normalize, instanceIndex, length, vertexIndex } from 'three/tsl';
+			import { uniform, varying, vec4, add, sub, max, dot, sin, mat3, uint, negate, instancedArray, cameraProjectionMatrix, cameraViewMatrix, positionLocal, modelWorldMatrix, sqrt, float, Fn, If, cos, Loop, Continue, normalize, instanceIndex, length, vertexIndex } from 'three/tsl';
+
 			import { Inspector } from 'three/addons/inspector/Inspector.js';
-			import { PrefixSum } from 'three/addons/gpgpu/PrefixSum.js';
 
 			import { OrbitControls } from 'three/addons/controls/OrbitControls.js';
 
@@ -49,10 +50,9 @@
 			let last = performance.now();
 
 			let pointer, raycaster;
-			let computeVelocity, computePosition, effectController, computeHash;
+			let computeVelocity, computePosition, effectController;
 
 			const BIRDS = 16384;
-			const TABLE_SIZE = BIRDS * 2;
 			const SPEED_LIMIT = 9.0;
 			const BOUNDS = 800, BOUNDS_HALF = BOUNDS / 2;
 
@@ -176,8 +176,6 @@
 				const velocityArray = new Float32Array( BIRDS * 3 );
 				const phaseArray = new Float32Array( BIRDS );
 
-			
-
 				for ( let i = 0; i < BIRDS; i ++ ) {
 
 					const posX = Math.random() * BOUNDS - BOUNDS_HALF;
@@ -290,196 +288,6 @@
 
 				// Define GPU Compute shaders.
 				// Shaders are computationally identical to their GLSL counterparts outside of texture destructuring.
-				const intCoord = Fn( ( [ x, spacing ] ) => {
-
-					return floor( x.div( spacing ) );
-
-				} ).setLayout( {
-					name: 'intCoord',
-					type: 'int',
-					inputs: [
-						{ name: 'x', type: 'float' },
-						{ name: 'spacing', type: 'float' }
-					]
-				} );
-
-				const hash3DFloatCoord = Fn( ( [ v ] ) => {
-
-					const x = intCoord( v.x );
-					const y = intCoord( v.y );
-					const z = intCoord( v.z );
-
-					const xMul = x.mul( 92837111 );
-					const yMul = y.mul( 689287499 );
-					const zMul = z.mul( 283923481 );
-
-					const h = xMul.pow( yMul ).pow( zMul );
-
-					return abs( h ).modInt( TABLE_SIZE );
-
-				} ).setLayout( {
-					name: 'hash3DCoord',
-					type: 'int',
-					inputs: [
-						{ name: 'v', type: 'vec3' },
-						{ name: 'spacing', type: 'float' }
-					]
-				} );
-
-				const hash3DIntCoord = Fn( ( [ x, y, z ] ) => {
-
-					const xMul = x.mul( 92837111 );
-					const yMul = y.mul( 689287499 );
-					const zMul = z.mul( 283923481 );
-
-					const h = xMul.pow( yMul ).pow( zMul );
-
-					return abs( h ).modInt( TABLE_SIZE );
-
-				} ).setLayout( {
-					name: 'hash3DIntCoord',
-					type: 'int',
-					inputs: [
-						{ name: 'x', type: 'int' },
-						{ name: 'y', type: 'int'}
-						{ name: 'z', type: 'int'}
-					]
-				} );
-
-				class Hash {
-
-					constructor( renderer, spacing, maxNumObjects ) {
-
-						this.spacing = spacing;
-						this.tableSize = maxNumObjects * 2;
-						this.cellStartArrayBuffer = new Int32Array( this.tableSize + 1 );
-						this.cellStartStorage = instancedArray( this.cellStartArrayBuffer, 'int' );
-						this.cellEntriesArrayBuffer = new Int32Array( maxNumObjects );
-						this.cellEntriesStorage = instancedArray( this.cellEntriesArrayBuffer, 'int' );
-						this.queryIdsArrayBuffer = new Int32Array( maxNumObjects );
-						this.queryIdsStorage = instancedArray( this.queryIdsArrayBuffer, 'int' );
-						this.querySize = 0;
-
-						// TODO: Check if faster to replace with .fill() operation
-						this.resetFn = this._createResetFn( maxNumObjects );
-						this.calculateHashFn = this._createCalculateHashFn( maxNumObjects );
-						this.prefixSumModule = new PrefixSum( renderer, [ 1, 2, 3, 4, 5, 6, 7, 8 ], 'uint' );
-						this.calculateCellEntriesFn = this._createCalculateCellEntriesFn( maxNumObjects );
-			
-					}
-
-					_createResetFn( maxNumObjects ) {
-
-						const fnDef = Fn( () => {
-
-							const { cellStartStorage, cellEntriesStorage } = this;
-
-							cellStartStorage.element( instanceIndex ).assign( 0 );
-							cellEntriesStorage.element( instanceIndex ).assign( 0 );
-			
-						} )().compute( maxNumObjects );
-
-						return fnDef;
-
-					}
-
-					_createCalculateHashFn( maxNumObjects ) {
-
-						const fnDef = Fn( () => {
-
-							const { cellStartStorage } = this;
-
-							const hashedCoord = hash3DFloatCoord( positionStorage.element( instanceIndex ) );
-							cellStartStorage.element( hashedCoord ).addAssign( 1 );
-			
-						} )().compute( maxNumObjects );
-
-						return fnDef;
-
-					}
-
-					_createCalculateCellEntriesFn( maxNumObjects ) {
-
-						const fnDef = Fn( () => {
-
-							const { cellStartStorage, cellEntriesStorage } = this;
-
-							const hashedCoord = hash3DCoord( positionStorage.element( instanceIndex ) );
-							cellStartStorage.element( hashedCoord ).subAssign( 1 );
-
-							// TODO: Prevent errors from two threads substracting the same cellStart entry at the same time
-							const cellStart = cellStartStorage.element( hashedCoord ).toVar();
-							cellEntriesStorage.element( cellStart ).assign( instanceIndex );
-
-						} )().compute( maxNumObjects );
-
-						return fnDef;
-
-					}
-
-					query() {
-
-						const fnDef = Fn( ( [ position, maxDist ] ) => {
-
-							const {queryIdsStorage, cellEntriesStorage} = this;
-
-							const x0 = intCoord( position.x.sub( maxDist ) );
-							const y0 = intCoord( position.y.sub( maxDist ) );
-							const z0 = intCoord( position.z.sub( maxDist ) );
-
-							const x1 = intCoord( position.x.sub( maxDist ) );
-							const y1 = intCoord( position.y.sub( maxDist ) );
-							const z1 = intCoord( position.z.sub( maxDist ) );
-
-							const xi = uint( x0 ).toVar();
-							const yi = uint( y0 ).toVar();
-							const zi = uint( z0 ).toVar();
-
-							const querySize = uint(0).toVar();
-
-							Loop( xi.lessThanEqual( x1 ), () => {
-
-								Loop( yi.lessThanEqual( y1 ), () => {
-
-									Loop( zi.lessThanEqual( z1 ), () => {
-
-										const hashedCoord = hash3DIntCoord(xi, yi, zi);
-										const start = cellStartStorage.element(hashedCoord);
-										const end = cellStartStorage.element(hashedCoord.add(1))
-
-										Loop({start: start, end: end, condition: '<', type: 'int'}, ({i}) => {
-
-											queryIdsStorage.element(querySize).assign(cellEntriesStorage.element(i))
-											querySize.addAssign(1)
-
-										})
-										
-										zi.addAssign( 1 );
-
-									} );
-
-									yi.addAssign( 1 );
-
-								} );
-
-								xi.addAssign( 1 );
-
-							} );
-
-						} ).setLayout( {
-							name: 'queryPosition',
-							type: 'float',
-							inputs: [
-								{ name: 'pos', type: 'vec3' },
-								{ name: 'maxDist', type: 'float' }
-							]
-						} );
-
-						return fnDef;
-
-					}
-
-				}
 
 				computeVelocity = Fn( () => {
 
diff --git a/examples/webgpu_compute_prefix_sum.html b/examples/webgpu_compute_prefix_sum.html
index 67cccca332a16b..27cd70b28903ee 100644
--- a/examples/webgpu_compute_prefix_sum.html
+++ b/examples/webgpu_compute_prefix_sum.html
@@ -50,8 +50,7 @@
 			};
 
 			const PREFIX_SUM_BUFFER_TYPES = {
-				INPUT_UNVECTORIZED: 'Input Unvectorized',
-				INPUT_VECTORIZED: 'Input Vectorized',
+				INPUT: 'Input',
 				REDUCTION: 'Reduction',
 				OUTPUT: 'Output'
 			};
@@ -97,7 +96,8 @@
 
 				await renderer.init();
 
-				const prefixSumModule = new PrefixSum( renderer, dynamicArray, 'uint' );
+				// Arguments: renderer/compute engine + default typed array
+				const prefixSumModule = new PrefixSum( renderer, new Uint32Array( dynamicArray ) );
 
 				computeReset = Fn( () => {
 
@@ -175,14 +175,10 @@
 
 						selectedBuffer = prefixSumModule.storageBuffers.reductionBuffer;
 
-					} else if ( effectController.loggedBuffer === PREFIX_SUM_BUFFER_TYPES.INPUT_UNVECTORIZED ) {
+					} else if ( effectController.loggedBuffer === PREFIX_SUM_BUFFER_TYPES.INPUT ) {
 
 						selectedBuffer = prefixSumModule.storageBuffers.unvectorizedOutputBuffer;
 
-					} else if ( effectController.loggedBuffer === PREFIX_SUM_BUFFER_TYPES.INPUT_VECTORIZED ) {
-
-						selectedBuffer = prefixSumModule.storageBuffers.dataBuffer;
-
 					} else {
 
 						selectedBuffer = prefixSumModule.storageBuffers.outputBuffer;
@@ -211,14 +207,10 @@
 
 					if ( prefixSumComplete ) {
 
-						console.log( 'reset prefix sum' );
-
 						renderer.compute( computeReset );
 			
 					} else {
 
-						console.log( 'compute prefix sum' );
-
 						await prefixSumModule.compute();
 
 					}
diff --git a/src/Three.TSL.js b/src/Three.TSL.js
index 9ca6dd167c08fb..587b6cabfdb9bb 100644
--- a/src/Three.TSL.js
+++ b/src/Three.TSL.js
@@ -80,7 +80,6 @@ export const batch = TSL.batch;
 export const bentNormalView = TSL.bentNormalView;
 export const billboarding = TSL.billboarding;
 export const bitAnd = TSL.bitAnd;
-export const bitCount = TSL.bitCount;
 export const bitNot = TSL.bitNot;
 export const bitOr = TSL.bitOr;
 export const bitXor = TSL.bitXor;
@@ -184,8 +183,6 @@ export const expression = TSL.expression;
 export const faceDirection = TSL.faceDirection;
 export const faceForward = TSL.faceForward;
 export const faceforward = TSL.faceforward;
-export const findLSB = TSL.findLSB;
-export const findMSB = TSL.findMSB;
 export const float = TSL.float;
 export const floatBitsToInt = TSL.floatBitsToInt;
 export const floatBitsToUint = TSL.floatBitsToUint;

From eb85c83db0f6dd163c629ff22191bb0202681643 Mon Sep 17 00:00:00 2001
From: Christian Helgeson <62450112+cmhhelgeson@users.noreply.github.com>
Date: Sun, 16 Nov 2025 20:58:06 -0800
Subject: [PATCH 08/10] add missing examples code

---
 examples/files.json                              |   1 +
 .../screenshots/webgpu_compute_prefix_sum.jpg    | Bin 0 -> 3413 bytes
 examples/tags.json                               |   1 +
 3 files changed, 2 insertions(+)
 create mode 100644 examples/screenshots/webgpu_compute_prefix_sum.jpg

diff --git a/examples/files.json b/examples/files.json
index 7bac8861c4693a..77a644e07dfd35 100644
--- a/examples/files.json
+++ b/examples/files.json
@@ -318,6 +318,7 @@
 		"webgpu_compute_particles_rain",
 		"webgpu_compute_particles_snow",
 		"webgpu_compute_points",
+		"webgpu_compute_prefix_sum",
 		"webgpu_compute_reduce",
 		"webgpu_compute_sort_bitonic",
 		"webgpu_compute_texture",
diff --git a/examples/screenshots/webgpu_compute_prefix_sum.jpg b/examples/screenshots/webgpu_compute_prefix_sum.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0f418a4bdb1013cde4212e728d61240386459a62
GIT binary patch
literal 3413
zcmex=<NpH&0WUXCHwH#V1_nkTWcYuZp#{hrVDSF{gCGaPFUASXjDieIf{e_9jQ@`?
zE&{rd72+}mC}3n_W?^Mx=iubx7GPjvWM*b!VP<7zVF9YF1<En72(k((8alEG2PU#B
z6^a-&PF%>L?6mQqXwbzED#l4gO`Kd};u4Zls%q*Qnp!5NX66=_R?aT2ZtfnQUcn)u
zVc`*xQOPN(Y3Ui6S;Zx#W#tu>Rn0A}ZS5VMU6UqHnL2IyjG40*Enc#8+42=DS8dw7
zW$U)>J9h3mboj{8W5-XNJay^vm8;jT-?(|};iJb-o<4j2;^nK4pFV&2`tAFVpT8Ix
znBf5p@eq=KfF5ULVqsxsVF&q#k*OSrnFU!`6%E;h90S=C3x$=88aYIqCNA7~kW<+>
z=!0ld(M2vX6_bamA3<IM`-nIXHId~bxQF2W`G1RnhnW!=Va$RI_6+|S?0+$6Ff3qd
m;2NbygJ3ijjAn$<vS7409IX*XYs1mnaI`iYj<uov|C<0FtjL4_

literal 0
HcmV?d00001

diff --git a/examples/tags.json b/examples/tags.json
index ca437e846eb7f4..e19bd7265014f1 100644
--- a/examples/tags.json
+++ b/examples/tags.json
@@ -127,6 +127,7 @@
 	"webgpu_compute_particles": [ "gpgpu" ],
 	"webgpu_compute_particles_rain": [ "gpgpu" ],
 	"webgpu_compute_particles_snow_external": [ "gpgpu" ],
+	"webgpu_compute_prefix_sum": ["gpgpu"],
 	"webgpu_compute_points": [ "gpgpu" ],
 	"webgpu_compute_reduce": [ "gpgpu" ],
 	"webgpu_compute_sort_bitonic": [ "gpgpu" ],

From 6726b5b72960379682626214c3b075f13cd290aa Mon Sep 17 00:00:00 2001
From: Christian Helgeson <62450112+cmhhelgeson@users.noreply.github.com>
Date: Sun, 16 Nov 2025 21:04:13 -0800
Subject: [PATCH 09/10] fix deepscan

---
 examples/jsm/gpgpu/PrefixSum.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/jsm/gpgpu/PrefixSum.js b/examples/jsm/gpgpu/PrefixSum.js
index 8dd7e4e1787d07..ab339f3f589b77 100644
--- a/examples/jsm/gpgpu/PrefixSum.js
+++ b/examples/jsm/gpgpu/PrefixSum.js
@@ -1,7 +1,7 @@
 import {
 	StorageInstancedBufferAttribute,
 } from 'three';
-import { Fn, If, instancedArray, invocationLocalIndex, countTrailingZeros, Loop, workgroupArray, subgroupSize, workgroupBarrier, workgroupId, uint, select, invocationSubgroupIndex, dot, uvec4, vec4, float, subgroupAdd, array, subgroupShuffle, subgroupInclusiveAdd, subgroupBroadcast, invocationSubgroupMetaIndex, arrayBuffer, storage } from 'three/tsl';
+import { Fn, If, instancedArray, invocationLocalIndex, countTrailingZeros, Loop, workgroupArray, subgroupSize, workgroupBarrier, workgroupId, uint, select, invocationSubgroupIndex, dot, uvec4, vec4, float, subgroupAdd, array, subgroupShuffle, subgroupInclusiveAdd, subgroupBroadcast, invocationSubgroupMetaIndex, storage } from 'three/tsl';
 
 const divRoundUp = ( size, part_size ) => {
 

From 68d57485f31d78018a6c9921746eae41ecd7f151 Mon Sep 17 00:00:00 2001
From: Christian Helgeson <62450112+cmhhelgeson@users.noreply.github.com>
Date: Tue, 18 Nov 2025 19:34:37 -0800
Subject: [PATCH 10/10] fix screenshot

---
 .../screenshots/webgpu_compute_prefix_sum.jpg | Bin 3413 -> 6494 bytes
 test/e2e/puppeteer.js                         |   1 +
 2 files changed, 1 insertion(+)

diff --git a/examples/screenshots/webgpu_compute_prefix_sum.jpg b/examples/screenshots/webgpu_compute_prefix_sum.jpg
index 0f418a4bdb1013cde4212e728d61240386459a62..c2bf0189d449dcb5cab2aadb22925355be2b36e8 100644
GIT binary patch
literal 6494
zcmeHLeOwe}9)E^eTu^C1P!vxlr4X-6QZbT{S;Igr4>L=x(><hs>qXAM3=fzEB+R!h
zwM<ACF$D9<5YY>g1rianTR;OBvt>a9l$Yggci5eI=6<tw-OI=B`oJIe$F+ReooAl!
z%sjvE@B4dsM!Y6ABiChNOT!RBAVk0)60f6eF!~>V#B${3gsjB7B;ke}+z8T*5YH32
zutvwbD?>kg2nSN;I7aT|Ja(Kbav(_3K}I?{%4C2{hB+j2bDS`J&Z05y&qT^+Z1R|!
z`rZ*IpC<|{Ca$bAJuq*>8)?pCCwWeu;`N~KOut#b4w(OF;A0DdLKi<77QSTZvS(L4
z_x$QLYhQ?Z>E-B{*gw1ypRoDOEpH`mP2ZOBcINhXcI@4^|NRdRe0VS`_vpuY$MOq~
ze{!nmbn%(9fBB-M@~iVz7pgDTT&llv_1lKVYx*W*OY6<HTkRd4xBL1B28YZRZrDm7
z@<*cY8uU;0fZ>FLOh(G&l0AgO7WgOKWRBD4jG3_L8F}O;_Zf3jojjg+??_>Vv(LPh
zrimNgs2e-!fdJ!!eUeFbi+-L#X(O`eM}vN}M{GjlNdg>3x*-PPMI9)|K{Xcn{qpk*
z1^*ir_$@uI5A}KSfj@tiIX?K{b@}bWiA?GY)fC}mzKC>him3cg8az+og7sI}cB9th
zeMdyQY6+p}7)*b~HZN=x(Ucrn(Dxq(eHVzyb!^`b6w&>SKTeq{BH}YGSB0iGCLC#K
z)0(`bOIJ9sah=M6ego&SMMM|;2*!ASJ=@+PqUQMq5lzmKiD+Bq61soTz>WJmVn&^|
zz3U%{5TCkCUlWWUZWobHtpi*6R4%N>2#bADhr@G7JYk(DW-09OJ=?z!1>y<G6lNn}
zC?Xgyk85$6aXYrVxzs=4(^CPqh4mYwuE&=b1Z|EczgImPJMv(pjt5UQow~l(RJQYz
zg`bqJ*FR8ohAf@kx}et*OXi6vW8NQ{YM9<s5jDahJ*tk}*`TqMi)bv0;b-k+hWnH@
zi~6P?K~?PZAMSrfgldD_6wUzH@O5#Phzx}yI==(O@v~lqc{LU&Q_dJoT7z<+1@1#%
zQn_P~Vhzvb3j+!l#_>05CNjciWD6uxGah<bL?I7}D5Y9N?g|$;GfO(NOfJMT#=GN4
z7{gNyB`DOivbuN&YbX@hN)>o=?8>ik(zBRTVqet)?W82%qPpM(CvMm2{~Q+;d*wvN
zl$Fyw7cxybRCd8A=)0y)4RP56juny0f4tv?)<rv>Sk@mO-1^zvt!xI`TFu|TiPe>8
zI61ciF-B62lMi4HsC0cyMD2RE7qrJIj)+LJN<?LH4#ZYTdh#}Z!6q%7;uITT4y@L`
z3PjctB61q99O%+ox>+tux-_bLqXq`$(uD-g+fE8L50qIxhZ_F<LCXq1BES3@_3)O^
zK_xB*S-JYL?cahsu?J!+9Vy<LFPNB0aDwY(t+bjZ5%mQSH$^lpc<QvwN`LR~ohB|X
zOAkmr<9ESVwYhvKhq~xB@~|hZUcv|bqq9X6qss_<oEZ4>VX}Q1TmM<+k-;1)8RBRO
ztD7@S*)3n3$q&%)il`-)pte=>{kmGp<bgD%cc|f;>@u)@@wpiyVm<^i-TbXFR(7ZZ
zfaf*{^Q_Tek=uhJI?QlJY5E?AV>Hgxa9b+wVUFO0yLY|CO#eH_@Yusq{WQNE(bDsN
zn31QYTwU)qNmFF0dhw7dF=W!A0kt);*gL&J#T+gf4J~_x-F(PKE%-UD(R=;kvhinL
zYmUbU-eU4lvOoCz3s{d&zvQ&pZ2NXT1D3Tj2Cq?Hq}rrlb2<}Lh1nnovl(CE#MVB#
zLFLM=VdIDCHib*b<b_9dMoDL3Q*mNS8i;Bn2O+US;g`X$v(fZ!Pwy4ptJ60sANv)1
zY^{iPBX;(Fw1QALB99UL$cK7u<70DM&0(BNNPUvdp(zlOh_2gYT_OKgL?*R}J~tc?
z(Rx^TxQMRBeJ!GscB>(w9*(iI63nz8w6mhVua5qjZnaNagt?%P_kk;QYQrOpu3qJC
zL!d7!1SWlr#EI%FydOMjThg1mS&cpP7+i^!!d8e>T11hoKR$=1*D`uKt=;N{BDyFu
z+h>44N;+zLqr(s*><tNCepXPXZp`j!#md)`3&>%YQM5<Q>0WN8`P~>{7aQ}=nfA+E
za`X*~>9mMwk%Eq#Ml~>(C6P0qSKo~cFZ44GGcAah4+S92wNRTj7wX_Prquc{nrZgT
zrE1u6=>+>1Cc=~>!(WLgYEakz0^AmO78sjkGh?Q1hTG3&TBtkNgXrt!J;){fOcc}P
z_4>z&EOl~fF!)QzEXbi+nnuwZVyKPHAC)LHW^W;@t%*%EwbYdLu(zfcSaPUh?dOfQ
zd^}v;4-2hS_ME*L3q;&q5@I8%9=Z{ni7V;vn3LQJLU>!gN;+_0H~3rFXQh1i(+8Ss
zJB3P??#m%<ZV_v0_$PrKFILC2Dda!=(jFqJ2unzncQLl5mt84;W7VCQJxohv{(WEI
z{RBU^cBJYj6tp3ncMpBqJ*7fZB+CxhnJ!h8uved2+1sEBX~Wg@4f<1%Ypd<(5UlT4
zY(mOu#73!m)D0@CYm-oC1rP3FN|+bGS87OuA{y#I7%FBIt>&Q9h|=_E>H!)rT(JT#
zd)N|4o=l<66443fEx?@u&<60Azz$P2@1cF#6;~Ke^QBI+OqYgFg$s)l46h(OrehRU
zauAAm+PcoS<G7jaT_<K!I@6hQp`N`p^Ei0+oc0uDi4gi@5x&UWU@SFY=KUf{fT|7!
zuq$1-BBG=$6j{&cmFL-^0)X9(&0<9B=`Q~^5jDUiK0*eswO&OUJu`SY<q5b_V~u8}
zE1ZS*MD(<Xln~2+AJh$r=nio9Z)bU&K$s%<iPZ9bZL>yOyeivCZ#p9sr9<#NJXgi&
zqDP@@5ryfv2SYyc1sdK=sag4Z9{Q7j0TI2we8tuk@(zFVl0hb4lI`AwR40g4qYp9U
z!E!?k`S!%urHKO;PJ+WC&UQ+@m<-p1q=O}65pyacM{0Sfu6-Sh(gaNS0eP$js=95K
z_6r{Px)j&L{u+SUHq9j!ndjYUB_X%A>-dSv--5M=w8bB1&nYs7<;0(*2i)sBNWN`U
z-iAgyZCPh!9OrB4O7{hCtAEQ3Klg38HV>t|14>he+EcLJrxt6B;Gliok}~1jp54G6
zNZXL2^)qWzw9@>3!*_TUVqXrlKsKLgUW<*ARhOCX?87)4R7qN|?ZW#J8!e(D>1Nqj
z*QuZnX-&`Xr2Mh7*0#QE&RL^x?$)iZWIc!2tnLw&DtnL<_#@i1WO?8>U}bDd^@ONi
zp1u+R3cLK`>Kv*bbayXb@!(ah-K&pu#@I6Z!B4*t(I0UbxG7MuN2tLpMP?DJmm(7G
z9y17_f1ZozDikV)C6`xQ<ZTK%8>-{#Pak_yM6x=e@aDMP8u^gVVYcI;k89<_dq-T5
zAtl|0rb8D0`2f@WAzPS~onomt7ceNIg0eydou~a5t{vDIY7q$YkArj`3hW|n`B0IQ
zRcrzd<zpTEyZ)<H_q3Rq&omc4@2%wB-OajvaW}ZY<A*gvlj{9QzH~JG`cOD{ZLWR_
z)BJ&m3Lj6kl#T1m1ij^y6?QA=V+MV7E#!uelBqkOyKMZ;y@e$=|Fr1-(;R(}HmyBD
zJ%pV%*<P(V+rxWa)kRo$cC@INOSearUsh|l8O50(2-yiC^X7#uN$&MJ{QUeZB!u2m
zO7|q48i}*A5MK(MUcI&*s`vZsS+Y4vD;o;Ej?=y7BX<A7Q3RS0gfG&j_yg69UT%)&
z0&VlSc(amQ+B%JDHl%ldNgv}hzw2Xny|(%FSDKo7db-vtUs$vFweI%l(wF93-#2;H
z$_2iv5J>df%eAq+)_EsWMZ~Rp3-T_!4}g7YYD%49Fh)s27?t$3+usUESXRPn(`bP9
zO8b8as=^YKl`(*{Z~95>LkLH;9e>46tZ{Rz7SB=0`7qKJ&ek2`T}4!Rr<&zAz&^wW
z{Qwh@aZ9TX-b?_FxQlZGTyM38kA~KE7iorl<k81DAK2#_)C>pO>zgd^o(9EZiwR^q
zPV3F~NqfGXA~Np=Xb=bcm5fG!RsqygMBT9M4clxA1?)+{?m$cn#Boy~-gT!r6ML?Q
zHmwyJq4P=+osJWxG*(dj>@O3gcN*j^z}fJ9aa@~iJ-!T7Fwtr^44t-f(zGM|OQvL9
z`jEeS$Kgl%cAG!Vz5cr!P3wx=cIWX=#cT>H&du4=y!}$C`$Z3ezc7llGqU}|$UCoJ
Po;SiR+y5oya#8;`UjKoc

delta 41
lcmca-bX96YJQK@*2K!%|Co%;wZJxjsj7ltFWVZi*697{S54ivU

diff --git a/test/e2e/puppeteer.js b/test/e2e/puppeteer.js
index a909d55420b36b..2be8cd141aec44 100644
--- a/test/e2e/puppeteer.js
+++ b/test/e2e/puppeteer.js
@@ -70,6 +70,7 @@ const exceptionList = [
 	'webgpu_compute_birds',
 	'webgpu_compute_cloth',
 	'webgpu_compute_particles_fluid',
+	"webgpu_compute_prefix_sum",
 	'webgpu_compute_reduce',
 	'webgpu_compute_sort_bitonic',
 	'webgpu_compute_texture',