Skip to content

Commit a6db74d

Browse files
committed
Prevent full external tx reading and parsing when only TxInpoints are needed
1 parent 715e3b1 commit a6db74d

File tree

2 files changed

+297
-3
lines changed

2 files changed

+297
-3
lines changed

stores/utxo/aerospike/get.go

Lines changed: 68 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,43 @@ func (s *Store) getTxFromBins(bins aerospike.BinMap) (tx *bt.Tx, err error) {
430430
return tx, nil
431431
}
432432

433+
// needsFullExternalTx returns true if any requested field requires the full external transaction.
434+
// This is used to optimize memory usage by avoiding full transaction deserialization when only
435+
// TxInpoints are needed.
436+
func needsFullExternalTx(requestedFields []fields.FieldName) bool {
437+
return slices.Contains(requestedFields, fields.Tx) ||
438+
slices.Contains(requestedFields, fields.Inputs)
439+
}
440+
441+
// extractInputsFromTxBytes reads only the inputs from raw transaction bytes,
442+
// skipping output parsing to reduce memory allocation. This is significantly more
443+
// efficient than deserializing the entire transaction when only input data is needed.
444+
func extractInputsFromTxBytes(txBytes []byte) ([]*bt.Input, error) {
445+
r := bytes.NewReader(txBytes)
446+
447+
// Skip version (4 bytes)
448+
if _, err := r.Seek(4, 1); err != nil {
449+
return nil, errors.NewTxInvalidError("failed to skip version bytes", err)
450+
}
451+
452+
// Read input count
453+
var inputCount bt.VarInt
454+
if _, err := inputCount.ReadFrom(r); err != nil {
455+
return nil, errors.NewTxInvalidError("failed to read input count", err)
456+
}
457+
458+
inputs := make([]*bt.Input, int(inputCount))
459+
for i := range inputs {
460+
inputs[i] = &bt.Input{}
461+
if _, err := inputs[i].ReadFrom(r); err != nil {
462+
return nil, errors.NewTxInvalidError("failed to read input %d", i, err)
463+
}
464+
}
465+
466+
// Don't read outputs - return early to save memory
467+
return inputs, nil
468+
}
469+
433470
// addAbstractedBins expands the list of field names to include dependent fields.
434471
// This internal method ensures that when certain abstracted fields are requested,
435472
// all necessary underlying fields are also retrieved from the database.
@@ -577,11 +614,12 @@ NEXT_BATCH_RECORD:
577614

578615
items[idx].Data = &meta.Data{}
579616

580-
// If the tx is external, we need to fetch it from the external store...
617+
// If the tx is external and we need the full transaction (for fields.Tx or fields.Inputs),
618+
// fetch it from the external store. For TxInpoints only, we use a lighter weight path.
581619
var externalTx *bt.Tx
582620

583621
external, ok := bins[fields.External.String()].(bool)
584-
if ok && external {
622+
if ok && external && needsFullExternalTx(items[idx].Fields) {
585623
if externalTx, err = s.GetTxFromExternalStore(ctx, items[idx].Hash); err != nil {
586624
items[idx].Err = err
587625

@@ -657,7 +695,13 @@ NEXT_BATCH_RECORD:
657695

658696
case fields.TxInpoints:
659697
if external {
660-
items[idx].Data.TxInpoints, err = subtree.NewTxInpointsFromTx(externalTx)
698+
if externalTx != nil {
699+
// Full transaction was already fetched (needed for fields.Tx or fields.Inputs)
700+
items[idx].Data.TxInpoints, err = subtree.NewTxInpointsFromTx(externalTx)
701+
} else {
702+
// Lightweight path: only extract inputs, skip outputs to save memory
703+
items[idx].Data.TxInpoints, err = s.getTxInpointsFromExternalStore(ctx, items[idx].Hash)
704+
}
661705
if err != nil {
662706
items[idx].Err = errors.NewTxInvalidError("could not process tx inpoints", err)
663707

@@ -1319,6 +1363,27 @@ func (s *Store) GetTxFromExternalStore(ctx context.Context, previousTxHash chain
13191363
return s.getExternalTransaction(ctx, previousTxHash)
13201364
}
13211365

1366+
// getTxInpointsFromExternalStore fetches only the TxInpoints from an external transaction
1367+
// without deserializing the full transaction. This is a memory optimization that avoids
1368+
// parsing outputs when only input references (prev tx hash + output index) are needed.
1369+
func (s *Store) getTxInpointsFromExternalStore(ctx context.Context, txHash chainhash.Hash) (subtree.TxInpoints, error) {
1370+
ctx, _, _ = tracing.Tracer("aerospike").Start(ctx, "getTxInpointsFromExternalStore")
1371+
1372+
// Get raw transaction bytes from external store
1373+
txBytes, err := s.externalStore.Get(ctx, txHash[:], fileformat.FileTypeTx)
1374+
if err != nil {
1375+
return subtree.TxInpoints{}, errors.NewStorageError("[getTxInpointsFromExternalStore][%s] could not get tx from external store", txHash.String(), err)
1376+
}
1377+
1378+
// Extract only inputs, skip outputs
1379+
inputs, err := extractInputsFromTxBytes(txBytes)
1380+
if err != nil {
1381+
return subtree.TxInpoints{}, errors.NewTxInvalidError("[getTxInpointsFromExternalStore][%s] could not extract inputs", txHash.String(), err)
1382+
}
1383+
1384+
return subtree.NewTxInpointsFromInputs(inputs)
1385+
}
1386+
13221387
func (s *Store) getExternalTransaction(ctx context.Context, previousTxHash chainhash.Hash) (*bt.Tx, error) {
13231388
fileType := fileformat.FileTypeTx
13241389

Lines changed: 229 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,229 @@
1+
package aerospike
2+
3+
import (
4+
"testing"
5+
6+
"github.com/bsv-blockchain/go-bt/v2"
7+
"github.com/bsv-blockchain/go-bt/v2/bscript"
8+
"github.com/bsv-blockchain/go-bt/v2/chainhash"
9+
"github.com/bsv-blockchain/teranode/stores/utxo/fields"
10+
"github.com/stretchr/testify/require"
11+
)
12+
13+
func TestNeedsFullExternalTx(t *testing.T) {
14+
tests := []struct {
15+
name string
16+
fields []fields.FieldName
17+
want bool
18+
}{
19+
{
20+
name: "TxInpoints only",
21+
fields: []fields.FieldName{fields.TxInpoints},
22+
want: false,
23+
},
24+
{
25+
name: "Fee only",
26+
fields: []fields.FieldName{fields.Fee},
27+
want: false,
28+
},
29+
{
30+
name: "Fee and TxInpoints",
31+
fields: []fields.FieldName{fields.Fee, fields.TxInpoints},
32+
want: false,
33+
},
34+
{
35+
name: "TxInpoints with BlockIDs",
36+
fields: []fields.FieldName{fields.TxInpoints, fields.BlockIDs, fields.SizeInBytes},
37+
want: false,
38+
},
39+
{
40+
name: "Tx field alone",
41+
fields: []fields.FieldName{fields.Tx},
42+
want: true,
43+
},
44+
{
45+
name: "Inputs field alone",
46+
fields: []fields.FieldName{fields.Inputs},
47+
want: true,
48+
},
49+
{
50+
name: "TxInpoints with Tx",
51+
fields: []fields.FieldName{fields.TxInpoints, fields.Tx},
52+
want: true,
53+
},
54+
{
55+
name: "TxInpoints with Inputs",
56+
fields: []fields.FieldName{fields.TxInpoints, fields.Inputs},
57+
want: true,
58+
},
59+
{
60+
name: "Empty fields",
61+
fields: []fields.FieldName{},
62+
want: false,
63+
},
64+
}
65+
66+
for _, tt := range tests {
67+
t.Run(tt.name, func(t *testing.T) {
68+
got := needsFullExternalTx(tt.fields)
69+
require.Equal(t, tt.want, got)
70+
})
71+
}
72+
}
73+
74+
func TestExtractInputsFromTxBytes(t *testing.T) {
75+
t.Run("single input transaction", func(t *testing.T) {
76+
// Create a transaction with a single input
77+
tx := bt.NewTx()
78+
79+
prevTxID, err := chainhash.NewHashFromStr("0102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f20")
80+
require.NoError(t, err)
81+
82+
tx.Inputs = append(tx.Inputs, &bt.Input{
83+
PreviousTxOutIndex: 42,
84+
SequenceNumber: 0xffffffff,
85+
UnlockingScript: bscript.NewFromBytes([]byte{0x00, 0x01, 0x02}),
86+
})
87+
err = tx.Inputs[0].PreviousTxIDAdd(prevTxID)
88+
require.NoError(t, err)
89+
90+
// Add an output (this should be skipped by extractInputsFromTxBytes)
91+
tx.Outputs = append(tx.Outputs, &bt.Output{
92+
Satoshis: 1000,
93+
LockingScript: bscript.NewFromBytes([]byte{0x76, 0xa9, 0x14}),
94+
})
95+
96+
// Serialize and extract
97+
txBytes := tx.Bytes()
98+
inputs, err := extractInputsFromTxBytes(txBytes)
99+
100+
require.NoError(t, err)
101+
require.Len(t, inputs, 1)
102+
require.Equal(t, uint32(42), inputs[0].PreviousTxOutIndex)
103+
require.Equal(t, prevTxID.CloneBytes(), inputs[0].PreviousTxID())
104+
})
105+
106+
t.Run("multiple inputs transaction", func(t *testing.T) {
107+
tx := bt.NewTx()
108+
109+
// Add 3 inputs with different prev tx hashes and output indices
110+
for i := 0; i < 3; i++ {
111+
prevTxID := &chainhash.Hash{}
112+
prevTxID[0] = byte(i + 1)
113+
114+
input := &bt.Input{
115+
PreviousTxOutIndex: uint32(i * 10),
116+
SequenceNumber: 0xffffffff,
117+
UnlockingScript: bscript.NewFromBytes([]byte{0x00}),
118+
}
119+
err := input.PreviousTxIDAdd(prevTxID)
120+
require.NoError(t, err)
121+
tx.Inputs = append(tx.Inputs, input)
122+
}
123+
124+
// Add outputs that should be skipped
125+
for i := 0; i < 5; i++ {
126+
tx.Outputs = append(tx.Outputs, &bt.Output{
127+
Satoshis: uint64(i * 1000),
128+
LockingScript: bscript.NewFromBytes([]byte{0x76, 0xa9}),
129+
})
130+
}
131+
132+
txBytes := tx.Bytes()
133+
inputs, err := extractInputsFromTxBytes(txBytes)
134+
135+
require.NoError(t, err)
136+
require.Len(t, inputs, 3)
137+
138+
for i := 0; i < 3; i++ {
139+
require.Equal(t, uint32(i*10), inputs[i].PreviousTxOutIndex)
140+
require.Equal(t, byte(i+1), inputs[i].PreviousTxID()[0])
141+
}
142+
})
143+
144+
t.Run("transaction with large unlocking script", func(t *testing.T) {
145+
tx := bt.NewTx()
146+
147+
prevTxID := &chainhash.Hash{}
148+
prevTxID[0] = 0xAB
149+
150+
// Create a large unlocking script (simulating a complex script)
151+
largeScript := make([]byte, 10000)
152+
for i := range largeScript {
153+
largeScript[i] = byte(i % 256)
154+
}
155+
156+
input := &bt.Input{
157+
PreviousTxOutIndex: 99,
158+
SequenceNumber: 0xffffffff,
159+
UnlockingScript: bscript.NewFromBytes(largeScript),
160+
}
161+
err := input.PreviousTxIDAdd(prevTxID)
162+
require.NoError(t, err)
163+
tx.Inputs = append(tx.Inputs, input)
164+
165+
// Add output
166+
tx.Outputs = append(tx.Outputs, &bt.Output{
167+
Satoshis: 5000,
168+
LockingScript: bscript.NewFromBytes([]byte{0x00}),
169+
})
170+
171+
txBytes := tx.Bytes()
172+
inputs, err := extractInputsFromTxBytes(txBytes)
173+
174+
require.NoError(t, err)
175+
require.Len(t, inputs, 1)
176+
require.Equal(t, uint32(99), inputs[0].PreviousTxOutIndex)
177+
require.Equal(t, byte(0xAB), inputs[0].PreviousTxID()[0])
178+
})
179+
180+
t.Run("invalid bytes - too short", func(t *testing.T) {
181+
_, err := extractInputsFromTxBytes([]byte{0x01, 0x02})
182+
require.Error(t, err)
183+
})
184+
185+
t.Run("extracted inputs match full parse", func(t *testing.T) {
186+
// Create a realistic transaction
187+
tx := bt.NewTx()
188+
189+
for i := 0; i < 5; i++ {
190+
prevTxID := &chainhash.Hash{}
191+
for j := 0; j < 32; j++ {
192+
prevTxID[j] = byte((i + j) % 256)
193+
}
194+
195+
input := &bt.Input{
196+
PreviousTxOutIndex: uint32(i),
197+
SequenceNumber: 0xfffffffe,
198+
UnlockingScript: bscript.NewFromBytes([]byte{0x48, 0x30, 0x45}),
199+
}
200+
err := input.PreviousTxIDAdd(prevTxID)
201+
require.NoError(t, err)
202+
tx.Inputs = append(tx.Inputs, input)
203+
}
204+
205+
for i := 0; i < 3; i++ {
206+
tx.Outputs = append(tx.Outputs, &bt.Output{
207+
Satoshis: uint64(i * 10000),
208+
LockingScript: bscript.NewFromBytes([]byte{0x76, 0xa9, 0x14}),
209+
})
210+
}
211+
212+
txBytes := tx.Bytes()
213+
214+
// Parse using our function
215+
extractedInputs, err := extractInputsFromTxBytes(txBytes)
216+
require.NoError(t, err)
217+
218+
// Parse using full deserialization
219+
fullTx, err := bt.NewTxFromBytes(txBytes)
220+
require.NoError(t, err)
221+
222+
// Compare
223+
require.Len(t, extractedInputs, len(fullTx.Inputs))
224+
for i := range extractedInputs {
225+
require.Equal(t, fullTx.Inputs[i].PreviousTxID(), extractedInputs[i].PreviousTxID())
226+
require.Equal(t, fullTx.Inputs[i].PreviousTxOutIndex, extractedInputs[i].PreviousTxOutIndex)
227+
}
228+
})
229+
}

0 commit comments

Comments
 (0)