From f32a30ac8b8a0d76983872310ffb0a5834bd2adc Mon Sep 17 00:00:00 2001 From: Nedyalko Dyakov Date: Sat, 13 Dec 2025 12:31:06 +0200 Subject: [PATCH 1/2] fix(queuedNewConn): protect against nil context --- internal/pool/pool.go | 6 ++++ internal/pool/pool_test.go | 58 +++++++++++++++++++++++++++++++++ internal/pool/want_conn_test.go | 53 ++++++++++++++++++++++++++++++ 3 files changed, 117 insertions(+) diff --git a/internal/pool/pool.go b/internal/pool/pool.go index d757d1f4f..77cecc646 100644 --- a/internal/pool/pool.go +++ b/internal/pool/pool.go @@ -321,6 +321,12 @@ func (p *ConnPool) newConn(ctx context.Context, pooled bool) (*Conn, error) { return nil, ErrPoolExhausted } + // Protect against nil context due to race condition in queuedNewConn + // where the context can be set to nil after timeout/cancellation + if ctx == nil { + ctx = context.Background() + } + dialCtx, cancel := context.WithTimeout(ctx, p.cfg.DialTimeout) defer cancel() cn, err := p.dialConn(dialCtx, pooled) diff --git a/internal/pool/pool_test.go b/internal/pool/pool_test.go index 8e2f0394a..757057b37 100644 --- a/internal/pool/pool_test.go +++ b/internal/pool/pool_test.go @@ -1037,6 +1037,64 @@ var _ = Describe("queuedNewConn", func() { testPool.Put(ctx, reqBConn) Eventually(func() int { return testPool.QueueLen() }, "600ms").Should(Equal(0)) }) + // Test for race condition where nil context can be passed to newConn + // This reproduces the issue reported in GitHub where queuedNewConn panics + // with "cannot create context from nil parent" + It("should handle nil context race condition in queuedNewConn", func() { + // Create a pool with very short timeouts to trigger the race condition + testPool := pool.NewConnPool(&pool.Options{ + Dialer: func(ctx context.Context) (net.Conn, error) { + // Add a small delay to increase chance of race condition + time.Sleep(50 * time.Millisecond) + return dummyDialer(ctx) + }, + PoolSize: int32(10), + MaxConcurrentDials: 10, + PoolTimeout: 10 * time.Millisecond, // Very short timeout + DialTimeout: 100 * time.Millisecond, + ConnMaxIdleTime: time.Millisecond, + }) + defer testPool.Close() + + // Try to trigger the race condition by making many concurrent requests + // with short timeouts + const numGoroutines = 50 + var wg sync.WaitGroup + errors := make(chan error, numGoroutines) + + for i := 0; i < numGoroutines; i++ { + wg.Add(1) + go func() { + defer GinkgoRecover() + defer wg.Done() + + // Use a very short context timeout to trigger the race + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Millisecond) + defer cancel() + + _, err := testPool.Get(ctx) + if err != nil { + // We expect timeout errors, but not panics + errors <- err + } + }() + } + + wg.Wait() + close(errors) + + // Check that we got timeout errors (expected) but no panics + // The test passes if it doesn't panic + timeoutCount := 0 + for err := range errors { + if err == context.DeadlineExceeded || err == pool.ErrPoolTimeout { + timeoutCount++ + } + } + + // We should have at least some timeouts due to the short timeout + Expect(timeoutCount).To(BeNumerically(">", 0)) + }) }) func init() { diff --git a/internal/pool/want_conn_test.go b/internal/pool/want_conn_test.go index 9526f70cc..09e8627bb 100644 --- a/internal/pool/want_conn_test.go +++ b/internal/pool/want_conn_test.go @@ -442,3 +442,56 @@ func BenchmarkWantConnQueue_EnqueueDequeue(b *testing.B) { q.dequeue() } } + +// TestWantConn_RaceConditionNilContext tests the race condition where +// getCtxForDial can return nil after the context is cancelled. +// This test verifies that the fix in newConn handles nil context gracefully. +func TestWantConn_RaceConditionNilContext(t *testing.T) { + // This test simulates the race condition described in the issue: + // 1. Main goroutine creates a wantConn with a context + // 2. Background goroutine starts but hasn't called getCtxForDial yet + // 3. Main goroutine times out and calls cancel(), setting w.ctx to nil + // 4. Background goroutine calls getCtxForDial() and gets nil + // 5. Background goroutine calls newConn(nil, true) which should not panic + + dialCtx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond) + defer cancel() + + w := &wantConn{ + ctx: dialCtx, + cancelCtx: cancel, + result: make(chan wantConnResult, 1), + } + + // Simulate the race condition by canceling the context + // and then trying to get it + var wg sync.WaitGroup + wg.Add(1) + + go func() { + defer wg.Done() + // Small delay to ensure cancel happens first + time.Sleep(10 * time.Millisecond) + + // This should return nil after cancel + ctx := w.getCtxForDial() + + // Verify that we got nil context + if ctx != nil { + t.Errorf("Expected nil context after cancel, got %v", ctx) + } + }() + + // Cancel the context immediately + w.cancel() + + wg.Wait() + + // Verify the wantConn state + if !w.done { + t.Error("wantConn should be marked as done after cancel") + } + if w.ctx != nil { + t.Error("wantConn.ctx should be nil after cancel") + } +} From b9fc4c089ade185a227df775d06d19b3e7cdd313 Mon Sep 17 00:00:00 2001 From: Nedyalko Dyakov Date: Mon, 15 Dec 2025 10:10:15 +0200 Subject: [PATCH 2/2] fix flacky test --- commands_test.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/commands_test.go b/commands_test.go index c5fa40e60..57518c3df 100644 --- a/commands_test.go +++ b/commands_test.go @@ -8870,11 +8870,15 @@ var _ = Describe("Commands", func() { It("returns latencies", func() { const key = "latency-monitor-threshold" + // reset all latencies first to ensure clean state + err := client.LatencyReset(ctx).Err() + Expect(err).NotTo(HaveOccurred()) + old := client.ConfigGet(ctx, key).Val() client.ConfigSet(ctx, key, "1") defer client.ConfigSet(ctx, key, old[key]) - err := client.Do(ctx, "DEBUG", "SLEEP", 0.01).Err() + err = client.Do(ctx, "DEBUG", "SLEEP", 0.01).Err() Expect(err).NotTo(HaveOccurred()) result, err := client.Latency(ctx).Result() @@ -8921,6 +8925,10 @@ var _ = Describe("Commands", func() { It("reset latencies by add event name args", func() { const key = "latency-monitor-threshold" + // reset all latencies first to ensure clean state + err := client.LatencyReset(ctx).Err() + Expect(err).NotTo(HaveOccurred()) + old := client.ConfigGet(ctx, key).Val() client.ConfigSet(ctx, key, "1") defer client.ConfigSet(ctx, key, old[key])