Skip to content

Commit 9404bb8

Browse files
niaowdeadprogram
authored andcommitted
internal/task (threads): save stack bounds instead of scanning under a lock
In order to scan stacks, the GC preempts all other threads and has them scan their own stack. This is somewhat expensive since all of these threads have to fight over a single lock. Instead, save the stack bounds and let the GC thread perform the scan. This also fixes a few other bugs I ran into: 1. The GC starts scanning before the world stops. This can cause it to miss some objects (and mistakenly free them) if memory is modified while stopping. 2. The GC does not wait for threads to resume. This can cause notifications to be misinterpreted due to signal nesting if the GC is re-run before all threads wake.
1 parent 20e22d4 commit 9404bb8

File tree

2 files changed

+103
-89
lines changed

2 files changed

+103
-89
lines changed

src/internal/task/task_threads.go

Lines changed: 101 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -23,16 +23,15 @@ type state struct {
2323
// is needed to be able to scan the stack.
2424
stackTop uintptr
2525

26+
// Lowest address of the stack.
27+
// This is populated when the thread is stopped by the GC.
28+
stackBottom uintptr
29+
2630
// Next task in the activeTasks queue.
2731
QueueNext *Task
2832

2933
// Semaphore to pause/resume the thread atomically.
3034
pauseSem Semaphore
31-
32-
// Semaphore used for stack scanning.
33-
// We can't reuse pauseSem here since the thread might have been paused for
34-
// other reasons (for example, because it was waiting on a channel).
35-
gcSem Semaphore
3635
}
3736

3837
// Goroutine counter, starting at 0 for the main goroutine.
@@ -96,6 +95,9 @@ func (t *Task) Resume() {
9695
t.state.pauseSem.Post()
9796
}
9897

98+
// otherGoroutines is the total number of live goroutines minus one.
99+
var otherGoroutines uint32
100+
99101
// Start a new OS thread.
100102
func start(fn uintptr, args unsafe.Pointer, stackSize uintptr) {
101103
t := &Task{}
@@ -115,6 +117,7 @@ func start(fn uintptr, args unsafe.Pointer, stackSize uintptr) {
115117
}
116118
t.state.QueueNext = activeTasks
117119
activeTasks = t
120+
otherGoroutines++
118121
activeTaskLock.Unlock()
119122
}
120123

@@ -135,6 +138,7 @@ func taskExited(t *Task) {
135138
break
136139
}
137140
}
141+
otherGoroutines--
138142
activeTaskLock.Unlock()
139143

140144
// Sanity check.
@@ -143,9 +147,42 @@ func taskExited(t *Task) {
143147
}
144148
}
145149

146-
// Futex to wait on until all tasks have finished scanning the stack.
147-
// This is basically a sync.WaitGroup.
148-
var scanDoneFutex Futex
150+
// scanWaitGroup is used to wait on until all threads have finished the current state transition.
151+
var scanWaitGroup waitGroup
152+
153+
type waitGroup struct {
154+
f Futex
155+
}
156+
157+
func initWaitGroup(n uint32) waitGroup {
158+
var wg waitGroup
159+
wg.f.Store(n)
160+
return wg
161+
}
162+
163+
func (wg *waitGroup) done() {
164+
if wg.f.Add(^uint32(0)) == 0 {
165+
wg.f.WakeAll()
166+
}
167+
}
168+
169+
func (wg *waitGroup) wait() {
170+
for {
171+
val := wg.f.Load()
172+
if val == 0 {
173+
return
174+
}
175+
wg.f.Wait(val)
176+
}
177+
}
178+
179+
// gcState is used to track and notify threads when the GC is stopping/resuming.
180+
var gcState Futex
181+
182+
const (
183+
gcStateResumed = iota
184+
gcStateStopped
185+
)
149186

150187
// GC scan phase. Because we need to stop the world while scanning, this kinda
151188
// needs to be done in the tasks package.
@@ -155,100 +192,99 @@ var scanDoneFutex Futex
155192
func GCStopWorldAndScan() {
156193
current := Current()
157194

158-
// Don't allow new goroutines to be started while pausing/resuming threads
159-
// in the stop-the-world phase.
160-
activeTaskLock.Lock()
195+
// NOTE: This does not need to be atomic.
196+
if gcState.Load() == gcStateResumed {
197+
// Don't allow new goroutines to be started while pausing/resuming threads
198+
// in the stop-the-world phase.
199+
activeTaskLock.Lock()
161200

162-
// Pause all other threads.
163-
numOtherThreads := uint32(0)
164-
for t := activeTasks; t != nil; t = t.state.QueueNext {
165-
if t != current {
166-
numOtherThreads++
167-
tinygo_task_send_gc_signal(t.state.thread)
168-
}
169-
}
201+
// Wait for threads to finish resuming.
202+
scanWaitGroup.wait()
170203

171-
// Store the number of threads to wait for in the futex.
172-
// This is the equivalent of doing an initial wg.Add(numOtherThreads).
173-
scanDoneFutex.Store(numOtherThreads)
204+
// Change the gc state to stopped.
205+
// NOTE: This does not need to be atomic.
206+
gcState.Store(gcStateStopped)
174207

175-
// Scan the current stack, and all current registers.
176-
scanCurrentStack()
208+
// Set the number of threads to wait for.
209+
scanWaitGroup = initWaitGroup(otherGoroutines)
177210

178-
// Wake each paused thread for the first time so it will scan the stack.
179-
for t := activeTasks; t != nil; t = t.state.QueueNext {
180-
if t != current {
181-
t.state.gcSem.Post()
211+
// Pause all other threads.
212+
for t := activeTasks; t != nil; t = t.state.QueueNext {
213+
if t != current {
214+
tinygo_task_send_gc_signal(t.state.thread)
215+
}
182216
}
217+
218+
// Wait for the threads to finish stopping.
219+
scanWaitGroup.wait()
183220
}
184221

185-
// Wait until all threads have finished scanning their stack.
186-
// This is the equivalent of wg.Wait()
187-
for {
188-
val := scanDoneFutex.Load()
189-
if val == 0 {
190-
break
222+
// Scan other thread stacks.
223+
for t := activeTasks; t != nil; t = t.state.QueueNext {
224+
if t != current {
225+
markRoots(t.state.stackBottom, t.state.stackTop)
191226
}
192-
scanDoneFutex.Wait(val)
193227
}
194228

229+
// Scan the current stack, and all current registers.
230+
scanCurrentStack()
231+
195232
// Scan all globals (implemented in the runtime).
196233
gcScanGlobals()
197234
}
198235

199236
// After the GC is done scanning, resume all other threads.
200-
//
201-
// This must only be called after a GCStopWorldAndScan call.
202237
func GCResumeWorld() {
203-
current := Current()
204-
205-
// Wake each paused thread for the second time, so they will resume normal
206-
// operation.
207-
for t := activeTasks; t != nil; t = t.state.QueueNext {
208-
if t != current {
209-
t.state.gcSem.Post()
210-
}
238+
// NOTE: This does not need to be atomic.
239+
if gcState.Load() == gcStateResumed {
240+
// This is already resumed.
241+
return
211242
}
212243

244+
// Set the wait group to track resume progress.
245+
scanWaitGroup = initWaitGroup(otherGoroutines)
246+
247+
// Set the state to resumed.
248+
gcState.Store(gcStateResumed)
249+
250+
// Wake all of the stopped threads.
251+
gcState.WakeAll()
252+
213253
// Allow goroutines to start and exit again.
214254
activeTaskLock.Unlock()
215255
}
216256

257+
//go:linkname markRoots runtime.markRoots
258+
func markRoots(start, end uintptr)
259+
217260
// Scan globals, implemented in the runtime package.
218261
func gcScanGlobals()
219262

220263
var stackScanLock PMutex
221264

222265
//export tinygo_task_gc_pause
223266
func tingyo_task_gc_pause(sig int32) {
224-
// Wait until we get the signal to start scanning the stack.
225-
Current().state.gcSem.Wait()
226-
227-
// Scan the thread stack.
228-
// Only scan a single thread stack at a time, because the GC marking phase
229-
// doesn't support parallelism.
230-
// TODO: it may be possible to call markRoots directly (without saving
231-
// registers) since we are in a signal handler that already saved a bunch of
232-
// registers. This is an optimization left for a future time.
233-
stackScanLock.Lock()
234-
scanCurrentStack()
235-
stackScanLock.Unlock()
267+
// Write the entrty stack pointer to the state.
268+
Current().state.stackBottom = uintptr(stacksave())
269+
270+
// Notify the GC that we are stopped.
271+
scanWaitGroup.done()
236272

237-
// Equivalent of wg.Done(): subtract one from the futex and if the result is
238-
// 0 (meaning we were the last in the waitgroup), wake the waiting thread.
239-
n := uint32(1)
240-
if scanDoneFutex.Add(-n) == 0 {
241-
scanDoneFutex.Wake()
273+
// Wait for the GC to resume.
274+
for gcState.Load() == gcStateStopped {
275+
gcState.Wait(gcStateStopped)
242276
}
243277

244-
// Wait until we get the signal we can resume normally (after the mark phase
245-
// has finished).
246-
Current().state.gcSem.Wait()
278+
// Notify the GC that we have resumed.
279+
scanWaitGroup.done()
247280
}
248281

249282
//go:export tinygo_scanCurrentStack
250283
func scanCurrentStack()
251284

285+
//go:linkname stacksave runtime.stacksave
286+
func stacksave() unsafe.Pointer
287+
252288
// Return the highest address of the current stack.
253289
func StackTop() uintptr {
254290
return Current().state.stackTop

src/runtime/gc_boehm.go

Lines changed: 2 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,6 @@ var zeroSizedAlloc uint8
3131

3232
var gcLock task.PMutex
3333

34-
// Normally false, set to true during a GC scan when all other threads get
35-
// paused.
36-
var needsResumeWorld bool
37-
3834
func initHeap() {
3935
libgc_init()
4036

@@ -48,20 +44,8 @@ func gcInit()
4844

4945
//export tinygo_runtime_bdwgc_callback
5046
func gcCallback() {
51-
if hasParallelism && needsResumeWorld {
52-
// Should never happen, check for it anyway.
53-
runtimePanic("gc: world already stopped")
54-
}
55-
5647
// Mark globals and all stacks, and stop the world if we're using threading.
5748
gcMarkReachable()
58-
59-
// If we use a scheduler with parallelism (the threads scheduler for
60-
// example), we need to call gcResumeWorld() after scanning has finished.
61-
if hasParallelism {
62-
// Note that we need to resume the world after finishing the GC call.
63-
needsResumeWorld = true
64-
}
6549
}
6650

6751
func markRoots(start, end uintptr) {
@@ -87,7 +71,6 @@ func alloc(size uintptr, layout unsafe.Pointer) unsafe.Pointer {
8771
}
8872

8973
gcLock.Lock()
90-
needsResumeWorld = false
9174
var ptr unsafe.Pointer
9275
if layout == gclayout.NoPtrs.AsPtr() {
9376
// This object is entirely pointer free, for example make([]int, ...).
@@ -104,9 +87,7 @@ func alloc(size uintptr, layout unsafe.Pointer) unsafe.Pointer {
10487
// Memory returned from libgc_malloc has already been zeroed, so nothing
10588
// to do here.
10689
}
107-
if needsResumeWorld {
108-
gcResumeWorld()
109-
}
90+
gcResumeWorld()
11091
gcLock.Unlock()
11192
if ptr == nil {
11293
runtimePanic("gc: out of memory")
@@ -121,11 +102,8 @@ func free(ptr unsafe.Pointer) {
121102

122103
func GC() {
123104
gcLock.Lock()
124-
needsResumeWorld = false
125105
libgc_gcollect()
126-
if needsResumeWorld {
127-
gcResumeWorld()
128-
}
106+
gcResumeWorld()
129107
gcLock.Unlock()
130108
}
131109

0 commit comments

Comments
 (0)