From a01eac0a1a22be5326f7db573b7792279f82a4b0 Mon Sep 17 00:00:00 2001 From: Nia Waldvogel Date: Wed, 3 Dec 2025 15:49:12 -0500 Subject: [PATCH] internal/task (threads): save stack bounds instead of scanning under a lock In order to scan stacks, the GC preempts all other threads and has them scan their own stack. This is somewhat expensive since all of these threads have to fight over a single lock. Instead, save the stack bounds and let the GC thread perform the scan. This also fixes a few other bugs I ran into: 1. The GC starts scanning before the world stops. This can cause it to miss some objects (and mistakenly free them) if memory is modified while stopping. 2. The GC does not wait for threads to resume. This can cause notifications to be misinterpreted due to signal nesting if the GC is re-run before all threads wake. --- src/internal/task/task_threads.go | 166 ++++++++++++++++++------------ src/runtime/gc_boehm.go | 26 +---- 2 files changed, 103 insertions(+), 89 deletions(-) diff --git a/src/internal/task/task_threads.go b/src/internal/task/task_threads.go index 48d647f16d..2085fa979f 100644 --- a/src/internal/task/task_threads.go +++ b/src/internal/task/task_threads.go @@ -23,16 +23,15 @@ type state struct { // is needed to be able to scan the stack. stackTop uintptr + // Lowest address of the stack. + // This is populated when the thread is stopped by the GC. + stackBottom uintptr + // Next task in the activeTasks queue. QueueNext *Task // Semaphore to pause/resume the thread atomically. pauseSem Semaphore - - // Semaphore used for stack scanning. - // We can't reuse pauseSem here since the thread might have been paused for - // other reasons (for example, because it was waiting on a channel). - gcSem Semaphore } // Goroutine counter, starting at 0 for the main goroutine. @@ -96,6 +95,9 @@ func (t *Task) Resume() { t.state.pauseSem.Post() } +// otherGoroutines is the total number of live goroutines minus one. +var otherGoroutines uint32 + // Start a new OS thread. func start(fn uintptr, args unsafe.Pointer, stackSize uintptr) { t := &Task{} @@ -115,6 +117,7 @@ func start(fn uintptr, args unsafe.Pointer, stackSize uintptr) { } t.state.QueueNext = activeTasks activeTasks = t + otherGoroutines++ activeTaskLock.Unlock() } @@ -135,6 +138,7 @@ func taskExited(t *Task) { break } } + otherGoroutines-- activeTaskLock.Unlock() // Sanity check. @@ -143,9 +147,42 @@ func taskExited(t *Task) { } } -// Futex to wait on until all tasks have finished scanning the stack. -// This is basically a sync.WaitGroup. -var scanDoneFutex Futex +// scanWaitGroup is used to wait on until all threads have finished the current state transition. +var scanWaitGroup waitGroup + +type waitGroup struct { + f Futex +} + +func initWaitGroup(n uint32) waitGroup { + var wg waitGroup + wg.f.Store(n) + return wg +} + +func (wg *waitGroup) done() { + if wg.f.Add(^uint32(0)) == 0 { + wg.f.WakeAll() + } +} + +func (wg *waitGroup) wait() { + for { + val := wg.f.Load() + if val == 0 { + return + } + wg.f.Wait(val) + } +} + +// gcState is used to track and notify threads when the GC is stopping/resuming. +var gcState Futex + +const ( + gcStateResumed = iota + gcStateStopped +) // GC scan phase. Because we need to stop the world while scanning, this kinda // needs to be done in the tasks package. @@ -155,65 +192,71 @@ var scanDoneFutex Futex func GCStopWorldAndScan() { current := Current() - // Don't allow new goroutines to be started while pausing/resuming threads - // in the stop-the-world phase. - activeTaskLock.Lock() + // NOTE: This does not need to be atomic. + if gcState.Load() == gcStateResumed { + // Don't allow new goroutines to be started while pausing/resuming threads + // in the stop-the-world phase. + activeTaskLock.Lock() - // Pause all other threads. - numOtherThreads := uint32(0) - for t := activeTasks; t != nil; t = t.state.QueueNext { - if t != current { - numOtherThreads++ - tinygo_task_send_gc_signal(t.state.thread) - } - } + // Wait for threads to finish resuming. + scanWaitGroup.wait() - // Store the number of threads to wait for in the futex. - // This is the equivalent of doing an initial wg.Add(numOtherThreads). - scanDoneFutex.Store(numOtherThreads) + // Change the gc state to stopped. + // NOTE: This does not need to be atomic. + gcState.Store(gcStateStopped) - // Scan the current stack, and all current registers. - scanCurrentStack() + // Set the number of threads to wait for. + scanWaitGroup = initWaitGroup(otherGoroutines) - // Wake each paused thread for the first time so it will scan the stack. - for t := activeTasks; t != nil; t = t.state.QueueNext { - if t != current { - t.state.gcSem.Post() + // Pause all other threads. + for t := activeTasks; t != nil; t = t.state.QueueNext { + if t != current { + tinygo_task_send_gc_signal(t.state.thread) + } } + + // Wait for the threads to finish stopping. + scanWaitGroup.wait() } - // Wait until all threads have finished scanning their stack. - // This is the equivalent of wg.Wait() - for { - val := scanDoneFutex.Load() - if val == 0 { - break + // Scan other thread stacks. + for t := activeTasks; t != nil; t = t.state.QueueNext { + if t != current { + markRoots(t.state.stackBottom, t.state.stackTop) } - scanDoneFutex.Wait(val) } + // Scan the current stack, and all current registers. + scanCurrentStack() + // Scan all globals (implemented in the runtime). gcScanGlobals() } // After the GC is done scanning, resume all other threads. -// -// This must only be called after a GCStopWorldAndScan call. func GCResumeWorld() { - current := Current() - - // Wake each paused thread for the second time, so they will resume normal - // operation. - for t := activeTasks; t != nil; t = t.state.QueueNext { - if t != current { - t.state.gcSem.Post() - } + // NOTE: This does not need to be atomic. + if gcState.Load() == gcStateResumed { + // This is already resumed. + return } + // Set the wait group to track resume progress. + scanWaitGroup = initWaitGroup(otherGoroutines) + + // Set the state to resumed. + gcState.Store(gcStateResumed) + + // Wake all of the stopped threads. + gcState.WakeAll() + // Allow goroutines to start and exit again. activeTaskLock.Unlock() } +//go:linkname markRoots runtime.markRoots +func markRoots(start, end uintptr) + // Scan globals, implemented in the runtime package. func gcScanGlobals() @@ -221,34 +264,27 @@ var stackScanLock PMutex //export tinygo_task_gc_pause func tingyo_task_gc_pause(sig int32) { - // Wait until we get the signal to start scanning the stack. - Current().state.gcSem.Wait() - - // Scan the thread stack. - // Only scan a single thread stack at a time, because the GC marking phase - // doesn't support parallelism. - // TODO: it may be possible to call markRoots directly (without saving - // registers) since we are in a signal handler that already saved a bunch of - // registers. This is an optimization left for a future time. - stackScanLock.Lock() - scanCurrentStack() - stackScanLock.Unlock() + // Write the entrty stack pointer to the state. + Current().state.stackBottom = uintptr(stacksave()) + + // Notify the GC that we are stopped. + scanWaitGroup.done() - // Equivalent of wg.Done(): subtract one from the futex and if the result is - // 0 (meaning we were the last in the waitgroup), wake the waiting thread. - n := uint32(1) - if scanDoneFutex.Add(-n) == 0 { - scanDoneFutex.Wake() + // Wait for the GC to resume. + for gcState.Load() == gcStateStopped { + gcState.Wait(gcStateStopped) } - // Wait until we get the signal we can resume normally (after the mark phase - // has finished). - Current().state.gcSem.Wait() + // Notify the GC that we have resumed. + scanWaitGroup.done() } //go:export tinygo_scanCurrentStack func scanCurrentStack() +//go:linkname stacksave runtime.stacksave +func stacksave() unsafe.Pointer + // Return the highest address of the current stack. func StackTop() uintptr { return Current().state.stackTop diff --git a/src/runtime/gc_boehm.go b/src/runtime/gc_boehm.go index 08d16760c0..3b5d2ac067 100644 --- a/src/runtime/gc_boehm.go +++ b/src/runtime/gc_boehm.go @@ -31,10 +31,6 @@ var zeroSizedAlloc uint8 var gcLock task.PMutex -// Normally false, set to true during a GC scan when all other threads get -// paused. -var needsResumeWorld bool - func initHeap() { libgc_init() @@ -48,20 +44,8 @@ func gcInit() //export tinygo_runtime_bdwgc_callback func gcCallback() { - if hasParallelism && needsResumeWorld { - // Should never happen, check for it anyway. - runtimePanic("gc: world already stopped") - } - // Mark globals and all stacks, and stop the world if we're using threading. gcMarkReachable() - - // If we use a scheduler with parallelism (the threads scheduler for - // example), we need to call gcResumeWorld() after scanning has finished. - if hasParallelism { - // Note that we need to resume the world after finishing the GC call. - needsResumeWorld = true - } } func markRoots(start, end uintptr) { @@ -87,7 +71,6 @@ func alloc(size uintptr, layout unsafe.Pointer) unsafe.Pointer { } gcLock.Lock() - needsResumeWorld = false var ptr unsafe.Pointer if layout == gclayout.NoPtrs.AsPtr() { // This object is entirely pointer free, for example make([]int, ...). @@ -104,9 +87,7 @@ func alloc(size uintptr, layout unsafe.Pointer) unsafe.Pointer { // Memory returned from libgc_malloc has already been zeroed, so nothing // to do here. } - if needsResumeWorld { - gcResumeWorld() - } + gcResumeWorld() gcLock.Unlock() if ptr == nil { runtimePanic("gc: out of memory") @@ -121,11 +102,8 @@ func free(ptr unsafe.Pointer) { func GC() { gcLock.Lock() - needsResumeWorld = false libgc_gcollect() - if needsResumeWorld { - gcResumeWorld() - } + gcResumeWorld() gcLock.Unlock() }