Prevent full external tx reading and parsing when only TxInpoints are needed

oskarszoon · oskarszoon · commit a6db74d7c0aa · 2025-12-02T11:46:29.000+01:00
diff --git a/stores/utxo/aerospike/get.go b/stores/utxo/aerospike/get.go
@@ -430,6 +430,43 @@ func (s *Store) getTxFromBins(bins aerospike.BinMap) (tx *bt.Tx, err error) {
 	return tx, nil
 }
 
+// needsFullExternalTx returns true if any requested field requires the full external transaction.
+// This is used to optimize memory usage by avoiding full transaction deserialization when only
+// TxInpoints are needed.
+func needsFullExternalTx(requestedFields []fields.FieldName) bool {
+	return slices.Contains(requestedFields, fields.Tx) ||
+		slices.Contains(requestedFields, fields.Inputs)
+}
+
+// extractInputsFromTxBytes reads only the inputs from raw transaction bytes,
+// skipping output parsing to reduce memory allocation. This is significantly more
+// efficient than deserializing the entire transaction when only input data is needed.
+func extractInputsFromTxBytes(txBytes []byte) ([]*bt.Input, error) {
+	r := bytes.NewReader(txBytes)
+
+	// Skip version (4 bytes)
+	if _, err := r.Seek(4, 1); err != nil {
+		return nil, errors.NewTxInvalidError("failed to skip version bytes", err)
+	}
+
+	// Read input count
+	var inputCount bt.VarInt
+	if _, err := inputCount.ReadFrom(r); err != nil {
+		return nil, errors.NewTxInvalidError("failed to read input count", err)
+	}
+
+	inputs := make([]*bt.Input, int(inputCount))
+	for i := range inputs {
+		inputs[i] = &bt.Input{}
+		if _, err := inputs[i].ReadFrom(r); err != nil {
+			return nil, errors.NewTxInvalidError("failed to read input %d", i, err)
+		}
+	}
+
+	// Don't read outputs - return early to save memory
+	return inputs, nil
+}
+
 // addAbstractedBins expands the list of field names to include dependent fields.
 // This internal method ensures that when certain abstracted fields are requested,
 // all necessary underlying fields are also retrieved from the database.
@@ -577,11 +614,12 @@ NEXT_BATCH_RECORD:
 
 		items[idx].Data = &meta.Data{}
 
-		// If the tx is external, we need to fetch it from the external store...
+		// If the tx is external and we need the full transaction (for fields.Tx or fields.Inputs),
+		// fetch it from the external store. For TxInpoints only, we use a lighter weight path.
 		var externalTx *bt.Tx
 
 		external, ok := bins[fields.External.String()].(bool)
-		if ok && external {
+		if ok && external && needsFullExternalTx(items[idx].Fields) {
 			if externalTx, err = s.GetTxFromExternalStore(ctx, items[idx].Hash); err != nil {
 				items[idx].Err = err
 
@@ -657,7 +695,13 @@ NEXT_BATCH_RECORD:
 
 			case fields.TxInpoints:
 				if external {
-					items[idx].Data.TxInpoints, err = subtree.NewTxInpointsFromTx(externalTx)
+					if externalTx != nil {
+						// Full transaction was already fetched (needed for fields.Tx or fields.Inputs)
+						items[idx].Data.TxInpoints, err = subtree.NewTxInpointsFromTx(externalTx)
+					} else {
+						// Lightweight path: only extract inputs, skip outputs to save memory
+						items[idx].Data.TxInpoints, err = s.getTxInpointsFromExternalStore(ctx, items[idx].Hash)
+					}
 					if err != nil {
 						items[idx].Err = errors.NewTxInvalidError("could not process tx inpoints", err)
 
@@ -1319,6 +1363,27 @@ func (s *Store) GetTxFromExternalStore(ctx context.Context, previousTxHash chain
 	return s.getExternalTransaction(ctx, previousTxHash)
 }
 
+// getTxInpointsFromExternalStore fetches only the TxInpoints from an external transaction
+// without deserializing the full transaction. This is a memory optimization that avoids
+// parsing outputs when only input references (prev tx hash + output index) are needed.
+func (s *Store) getTxInpointsFromExternalStore(ctx context.Context, txHash chainhash.Hash) (subtree.TxInpoints, error) {
+	ctx, _, _ = tracing.Tracer("aerospike").Start(ctx, "getTxInpointsFromExternalStore")
+
+	// Get raw transaction bytes from external store
+	txBytes, err := s.externalStore.Get(ctx, txHash[:], fileformat.FileTypeTx)
+	if err != nil {
+		return subtree.TxInpoints{}, errors.NewStorageError("[getTxInpointsFromExternalStore][%s] could not get tx from external store", txHash.String(), err)
+	}
+
+	// Extract only inputs, skip outputs
+	inputs, err := extractInputsFromTxBytes(txBytes)
+	if err != nil {
+		return subtree.TxInpoints{}, errors.NewTxInvalidError("[getTxInpointsFromExternalStore][%s] could not extract inputs", txHash.String(), err)
+	}
+
+	return subtree.NewTxInpointsFromInputs(inputs)
+}
+
 func (s *Store) getExternalTransaction(ctx context.Context, previousTxHash chainhash.Hash) (*bt.Tx, error) {
 	fileType := fileformat.FileTypeTx
 
diff --git a/stores/utxo/aerospike/get_internal_test.go b/stores/utxo/aerospike/get_internal_test.go
@@ -0,0 +1,229 @@
+package aerospike
+
+import (
+	"testing"
+
+	"github.com/bsv-blockchain/go-bt/v2"
+	"github.com/bsv-blockchain/go-bt/v2/bscript"
+	"github.com/bsv-blockchain/go-bt/v2/chainhash"
+	"github.com/bsv-blockchain/teranode/stores/utxo/fields"
+	"github.com/stretchr/testify/require"
+)
+
+func TestNeedsFullExternalTx(t *testing.T) {
+	tests := []struct {
+		name   string
+		fields []fields.FieldName
+		want   bool
+	}{
+		{
+			name:   "TxInpoints only",
+			fields: []fields.FieldName{fields.TxInpoints},
+			want:   false,
+		},
+		{
+			name:   "Fee only",
+			fields: []fields.FieldName{fields.Fee},
+			want:   false,
+		},
+		{
+			name:   "Fee and TxInpoints",
+			fields: []fields.FieldName{fields.Fee, fields.TxInpoints},
+			want:   false,
+		},
+		{
+			name:   "TxInpoints with BlockIDs",
+			fields: []fields.FieldName{fields.TxInpoints, fields.BlockIDs, fields.SizeInBytes},
+			want:   false,
+		},
+		{
+			name:   "Tx field alone",
+			fields: []fields.FieldName{fields.Tx},
+			want:   true,
+		},
+		{
+			name:   "Inputs field alone",
+			fields: []fields.FieldName{fields.Inputs},
+			want:   true,
+		},
+		{
+			name:   "TxInpoints with Tx",
+			fields: []fields.FieldName{fields.TxInpoints, fields.Tx},
+			want:   true,
+		},
+		{
+			name:   "TxInpoints with Inputs",
+			fields: []fields.FieldName{fields.TxInpoints, fields.Inputs},
+			want:   true,
+		},
+		{
+			name:   "Empty fields",
+			fields: []fields.FieldName{},
+			want:   false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := needsFullExternalTx(tt.fields)
+			require.Equal(t, tt.want, got)
+		})
+	}
+}
+
+func TestExtractInputsFromTxBytes(t *testing.T) {
+	t.Run("single input transaction", func(t *testing.T) {
+		// Create a transaction with a single input
+		tx := bt.NewTx()
+
+		prevTxID, err := chainhash.NewHashFromStr("0102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f20")
+		require.NoError(t, err)
+
+		tx.Inputs = append(tx.Inputs, &bt.Input{
+			PreviousTxOutIndex: 42,
+			SequenceNumber:     0xffffffff,
+			UnlockingScript:    bscript.NewFromBytes([]byte{0x00, 0x01, 0x02}),
+		})
+		err = tx.Inputs[0].PreviousTxIDAdd(prevTxID)
+		require.NoError(t, err)
+
+		// Add an output (this should be skipped by extractInputsFromTxBytes)
+		tx.Outputs = append(tx.Outputs, &bt.Output{
+			Satoshis:      1000,
+			LockingScript: bscript.NewFromBytes([]byte{0x76, 0xa9, 0x14}),
+		})
+
+		// Serialize and extract
+		txBytes := tx.Bytes()
+		inputs, err := extractInputsFromTxBytes(txBytes)
+
+		require.NoError(t, err)
+		require.Len(t, inputs, 1)
+		require.Equal(t, uint32(42), inputs[0].PreviousTxOutIndex)
+		require.Equal(t, prevTxID.CloneBytes(), inputs[0].PreviousTxID())
+	})
+
+	t.Run("multiple inputs transaction", func(t *testing.T) {
+		tx := bt.NewTx()
+
+		// Add 3 inputs with different prev tx hashes and output indices
+		for i := 0; i < 3; i++ {
+			prevTxID := &chainhash.Hash{}
+			prevTxID[0] = byte(i + 1)
+
+			input := &bt.Input{
+				PreviousTxOutIndex: uint32(i * 10),
+				SequenceNumber:     0xffffffff,
+				UnlockingScript:    bscript.NewFromBytes([]byte{0x00}),
+			}
+			err := input.PreviousTxIDAdd(prevTxID)
+			require.NoError(t, err)
+			tx.Inputs = append(tx.Inputs, input)
+		}
+
+		// Add outputs that should be skipped
+		for i := 0; i < 5; i++ {
+			tx.Outputs = append(tx.Outputs, &bt.Output{
+				Satoshis:      uint64(i * 1000),
+				LockingScript: bscript.NewFromBytes([]byte{0x76, 0xa9}),
+			})
+		}
+
+		txBytes := tx.Bytes()
+		inputs, err := extractInputsFromTxBytes(txBytes)
+
+		require.NoError(t, err)
+		require.Len(t, inputs, 3)
+
+		for i := 0; i < 3; i++ {
+			require.Equal(t, uint32(i*10), inputs[i].PreviousTxOutIndex)
+			require.Equal(t, byte(i+1), inputs[i].PreviousTxID()[0])
+		}
+	})
+
+	t.Run("transaction with large unlocking script", func(t *testing.T) {
+		tx := bt.NewTx()
+
+		prevTxID := &chainhash.Hash{}
+		prevTxID[0] = 0xAB
+
+		// Create a large unlocking script (simulating a complex script)
+		largeScript := make([]byte, 10000)
+		for i := range largeScript {
+			largeScript[i] = byte(i % 256)
+		}
+
+		input := &bt.Input{
+			PreviousTxOutIndex: 99,
+			SequenceNumber:     0xffffffff,
+			UnlockingScript:    bscript.NewFromBytes(largeScript),
+		}
+		err := input.PreviousTxIDAdd(prevTxID)
+		require.NoError(t, err)
+		tx.Inputs = append(tx.Inputs, input)
+
+		// Add output
+		tx.Outputs = append(tx.Outputs, &bt.Output{
+			Satoshis:      5000,
+			LockingScript: bscript.NewFromBytes([]byte{0x00}),
+		})
+
+		txBytes := tx.Bytes()
+		inputs, err := extractInputsFromTxBytes(txBytes)
+
+		require.NoError(t, err)
+		require.Len(t, inputs, 1)
+		require.Equal(t, uint32(99), inputs[0].PreviousTxOutIndex)
+		require.Equal(t, byte(0xAB), inputs[0].PreviousTxID()[0])
+	})
+
+	t.Run("invalid bytes - too short", func(t *testing.T) {
+		_, err := extractInputsFromTxBytes([]byte{0x01, 0x02})
+		require.Error(t, err)
+	})
+
+	t.Run("extracted inputs match full parse", func(t *testing.T) {
+		// Create a realistic transaction
+		tx := bt.NewTx()
+
+		for i := 0; i < 5; i++ {
+			prevTxID := &chainhash.Hash{}
+			for j := 0; j < 32; j++ {
+				prevTxID[j] = byte((i + j) % 256)
+			}
+
+			input := &bt.Input{
+				PreviousTxOutIndex: uint32(i),
+				SequenceNumber:     0xfffffffe,
+				UnlockingScript:    bscript.NewFromBytes([]byte{0x48, 0x30, 0x45}),
+			}
+			err := input.PreviousTxIDAdd(prevTxID)
+			require.NoError(t, err)
+			tx.Inputs = append(tx.Inputs, input)
+		}
+
+		for i := 0; i < 3; i++ {
+			tx.Outputs = append(tx.Outputs, &bt.Output{
+				Satoshis:      uint64(i * 10000),
+				LockingScript: bscript.NewFromBytes([]byte{0x76, 0xa9, 0x14}),
+			})
+		}
+
+		txBytes := tx.Bytes()
+
+		// Parse using our function
+		extractedInputs, err := extractInputsFromTxBytes(txBytes)
+		require.NoError(t, err)
+
+		// Parse using full deserialization
+		fullTx, err := bt.NewTxFromBytes(txBytes)
+		require.NoError(t, err)
+
+		// Compare
+		require.Len(t, extractedInputs, len(fullTx.Inputs))
+		for i := range extractedInputs {
+			require.Equal(t, fullTx.Inputs[i].PreviousTxID(), extractedInputs[i].PreviousTxID())
+			require.Equal(t, fullTx.Inputs[i].PreviousTxOutIndex, extractedInputs[i].PreviousTxOutIndex)
+		}
+	})
+}