From ce7ffc036d03740825a8a12965996711a87362ea Mon Sep 17 00:00:00 2001 From: Nia Waldvogel Date: Sun, 30 Nov 2025 22:17:57 -0500 Subject: [PATCH] runtime (gc_blocks.go): use best-fit allocation The allocator originally just looped through the blocks until it found a sufficiently-long range. This is simple, but it fragments very easily and can degrade to a full heap scan for long requests. Instead, we now maintain a sorted nested list of free ranges by size. The allocator will select the shortest sufficient-length range, generally reducing fragmentation. This data structure can find a range in time directly proportional to the requested length. --- builder/sizes_test.go | 6 +- src/runtime/gc_blocks.go | 277 +++++++++++++++++++++++++++------------ 2 files changed, 196 insertions(+), 87 deletions(-) diff --git a/builder/sizes_test.go b/builder/sizes_test.go index c1d44f443d..3cc7242fa2 100644 --- a/builder/sizes_test.go +++ b/builder/sizes_test.go @@ -42,9 +42,9 @@ func TestBinarySize(t *testing.T) { // This is a small number of very diverse targets that we want to test. tests := []sizeTest{ // microcontrollers - {"hifive1b", "examples/echo", 3896, 280, 0, 2268}, - {"microbit", "examples/serial", 2860, 360, 8, 2272}, - {"wioterminal", "examples/pininterrupt", 7361, 1491, 116, 6912}, + {"hifive1b", "examples/echo", 4132, 280, 0, 2268}, + {"microbit", "examples/serial", 3024, 360, 8, 2272}, + {"wioterminal", "examples/pininterrupt", 7537, 1491, 116, 6912}, // TODO: also check wasm. Right now this is difficult, because // wasm binaries are run through wasm-opt and therefore the diff --git a/src/runtime/gc_blocks.go b/src/runtime/gc_blocks.go index 408656b258..f44f01bc32 100644 --- a/src/runtime/gc_blocks.go +++ b/src/runtime/gc_blocks.go @@ -51,7 +51,7 @@ const ( var ( metadataStart unsafe.Pointer // pointer to the start of the heap metadata - nextAlloc gcBlock // the next block that should be tried by the allocator + freeRanges *freeRange // freeRanges is a linked list of free block ranges endBlock gcBlock // the block just past the end of the available space gcTotalAlloc uint64 // total number of bytes allocated gcTotalBlocks uint64 // total number of allocated blocks @@ -225,6 +225,99 @@ func (b gcBlock) unmark() { } } +// freeRange is a node on the outer list of range lengths. +// The free ranges are structured as two nested singly-linked lists: +// - The outer level (freeRange) has one entry for each unique range length. +// - The inner level (freeRangeMore) has one entry for each additional range of the same length. +// This two-level structure ensures that insertion/removal times are proportional to the requested length. +type freeRange struct { + // len is the length of this free range. + len uintptr + + // nextLen is the next longer free range. + nextLen *freeRange + + // nextWithLen is the next free range with this length. + nextWithLen *freeRangeMore +} + +// freeRangeMore is a node on the inner list of equal-length ranges. +type freeRangeMore struct { + next *freeRangeMore +} + +// insertFreeRange inserts a range of len blocks starting at ptr into the free list. +func insertFreeRange(ptr unsafe.Pointer, len uintptr) { + if gcAsserts && len == 0 { + runtimePanic("gc: insert 0-length free range") + } + + // Find the insertion point by length. + // Skip until the next range is at least the target length. + insDst := &freeRanges + for *insDst != nil && (*insDst).len < len { + insDst = &(*insDst).nextLen + } + + // Create the new free range. + next := *insDst + if next != nil && next.len == len { + // Insert into the list with this length. + newRange := (*freeRangeMore)(ptr) + newRange.next = next.nextWithLen + next.nextWithLen = newRange + } else { + // Insert into the list of lengths. + newRange := (*freeRange)(ptr) + *newRange = freeRange{ + len: len, + nextLen: next, + nextWithLen: nil, + } + *insDst = newRange + } +} + +// popFreeRange removes a range of len blocks from the freeRanges list. +// It returns nil if there are no sufficiently long ranges. +func popFreeRange(len uintptr) unsafe.Pointer { + if gcAsserts && len == 0 { + runtimePanic("gc: pop 0-length free range") + } + + // Find the removal point by length. + // Skip until the next range is at least the target length. + remDst := &freeRanges + for *remDst != nil && (*remDst).len < len { + remDst = &(*remDst).nextLen + } + + rangeWithLength := *remDst + if rangeWithLength == nil { + // No ranges are long enough. + return nil + } + removedLen := rangeWithLength.len + + // Remove the range. + var ptr unsafe.Pointer + if nextWithLen := rangeWithLength.nextWithLen; nextWithLen != nil { + // Remove from the list with this length. + rangeWithLength.nextWithLen = nextWithLen.next + ptr = unsafe.Pointer(nextWithLen) + } else { + // Remove from the list of lengths. + *remDst = rangeWithLength.nextLen + ptr = unsafe.Pointer(rangeWithLength) + } + + if removedLen > len { + // Insert the leftover range. + insertFreeRange(unsafe.Add(ptr, len*bytesPerBlock), removedLen-len) + } + return ptr +} + func isOnHeap(ptr uintptr) bool { return ptr >= heapStart && ptr < uintptr(metadataStart) } @@ -239,6 +332,9 @@ func initHeap() { // Set all block states to 'free'. metadataSize := heapEnd - uintptr(metadataStart) memzero(unsafe.Pointer(metadataStart), metadataSize) + + // Rebuild the free ranges list. + buildFreeRanges() } // setHeapEnd is called to expand the heap. The heap can only grow, not shrink. @@ -270,6 +366,9 @@ func setHeapEnd(newHeapEnd uintptr) { if gcAsserts && uintptr(metadataStart) < uintptr(oldMetadataStart)+oldMetadataSize { runtimePanic("gc: heap did not grow enough at once") } + + // Rebuild the free ranges list. + buildFreeRanges() } // calculateHeapAddresses initializes variables such as metadataStart and @@ -338,100 +437,67 @@ func alloc(size uintptr, layout unsafe.Pointer) unsafe.Pointer { gcMallocs++ gcTotalBlocks += uint64(neededBlocks) - // Continue looping until a run of free blocks has been found that fits the - // requested size. - index := nextAlloc - numFreeBlocks := uintptr(0) - heapScanCount := uint8(0) + // Acquire a range of free blocks. + var ranGC bool + var grewHeap bool + var pointer unsafe.Pointer for { - if index == nextAlloc { - if heapScanCount == 0 { - heapScanCount = 1 - } else if heapScanCount == 1 { - // The entire heap has been searched for free memory, but none - // could be found. Run a garbage collection cycle to reclaim - // free memory and try again. - heapScanCount = 2 - freeBytes := runGC() - heapSize := uintptr(metadataStart) - heapStart - if freeBytes < heapSize/3 { - // Ensure there is at least 33% headroom. - // This percentage was arbitrarily chosen, and may need to - // be tuned in the future. - growHeap() - } - } else { - // Even after garbage collection, no free memory could be found. - // Try to increase heap size. - if growHeap() { - // Success, the heap was increased in size. Try again with a - // larger heap. - } else { - // Unfortunately the heap could not be increased. This - // happens on baremetal systems for example (where all - // available RAM has already been dedicated to the heap). - runtimePanicAt(returnAddress(0), "out of memory") - } - } + pointer = popFreeRange(neededBlocks) + if pointer != nil { + break } - // Wrap around the end of the heap. - if index == endBlock { - index = 0 - // Reset numFreeBlocks as allocations cannot wrap. - numFreeBlocks = 0 - // In rare cases, the initial heap might be so small that there are - // no blocks at all. In this case, it's better to jump back to the - // start of the loop and try again, until the GC realizes there is - // no memory and grows the heap. - // This can sometimes happen on WebAssembly, where the initial heap - // is created by whatever is left on the last memory page. + if !ranGC { + // Run the collector and try again. + freeBytes := runGC() + ranGC = true + heapSize := uintptr(metadataStart) - heapStart + if freeBytes < heapSize/3 { + // Ensure there is at least 33% headroom. + // This percentage was arbitrarily chosen, and may need to + // be tuned in the future. + growHeap() + } continue } - // Is the block we're looking at free? - if index.state() != blockStateFree { - // This block is in use. Try again from this point. - numFreeBlocks = 0 - index++ + if gcDebug && !grewHeap { + println("grow heap for request:", uint(neededBlocks)) + dumpFreeRangeCounts() + } + if growHeap() { + grewHeap = true continue } - numFreeBlocks++ - index++ - - // Are we finished? - if numFreeBlocks == neededBlocks { - // Found a big enough range of free blocks! - nextAlloc = index - thisAlloc := index - gcBlock(neededBlocks) - if gcDebug { - println("found memory:", thisAlloc.pointer(), int(size)) - } - // Set the following blocks as being allocated. - thisAlloc.setState(blockStateHead) - for i := thisAlloc + 1; i != nextAlloc; i++ { - i.setState(blockStateTail) - } + // Unfortunately the heap could not be increased. This + // happens on baremetal systems for example (where all + // available RAM has already been dedicated to the heap). + runtimePanicAt(returnAddress(0), "out of memory") + } - // We've claimed this allocation, now we can unlock the heap. - gcLock.Unlock() - - // Return a pointer to this allocation. - pointer := thisAlloc.pointer() - if preciseHeap { - // Store the object layout at the start of the object. - // TODO: this wastes a little bit of space on systems with - // larger-than-pointer alignment requirements. - *(*unsafe.Pointer)(pointer) = layout - add := align(unsafe.Sizeof(layout)) - pointer = unsafe.Add(pointer, add) - size -= add - } - memzero(pointer, size) - return pointer - } + // Set the backing blocks as being allocated. + block := blockFromAddr(uintptr(pointer)) + block.setState(blockStateHead) + for i := block + 1; i != block+gcBlock(neededBlocks); i++ { + i.setState(blockStateTail) } + + // We've claimed this allocation, now we can unlock the heap. + gcLock.Unlock() + + // Return a pointer to this allocation. + if preciseHeap { + // Store the object layout at the start of the object. + // TODO: this wastes a little bit of space on systems with + // larger-than-pointer alignment requirements. + *(*unsafe.Pointer)(pointer) = layout + add := align(unsafe.Sizeof(layout)) + pointer = unsafe.Add(pointer, add) + size -= add + } + memzero(pointer, size) + return pointer } func realloc(ptr unsafe.Pointer, size uintptr) unsafe.Pointer { @@ -518,6 +584,9 @@ func runGC() (freeBytes uintptr) { // the next collection cycle. freeBytes = sweep() + // Rebuild the free ranges list. + buildFreeRanges() + // Show how much has been sweeped, for debugging. if gcDebug { dumpHeap() @@ -717,6 +786,46 @@ func sweep() (freeBytes uintptr) { return } +// buildFreeRanges rebuilds the freeRanges list. +// This must be called after a GC sweep or heap grow. +func buildFreeRanges() { + freeRanges = nil + block := endBlock + for { + // Skip backwards over occupied blocks. + for block > 0 && (block-1).state() != blockStateFree { + block-- + } + if block == 0 { + break + } + + // Find the start of the free range. + end := block + for block > 0 && (block-1).state() == blockStateFree { + block-- + } + + // Insert the free range. + insertFreeRange(block.pointer(), uintptr(end-block)) + } + + if gcDebug { + println("free ranges after rebuild:") + dumpFreeRangeCounts() + } +} + +func dumpFreeRangeCounts() { + for rangeWithLength := freeRanges; rangeWithLength != nil; rangeWithLength = rangeWithLength.nextLen { + totalRanges := uintptr(1) + for nextWithLen := rangeWithLength.nextWithLen; nextWithLen != nil; nextWithLen = nextWithLen.next { + totalRanges++ + } + println("-", uint(rangeWithLength.len), "x", uint(totalRanges)) + } +} + // dumpHeap can be used for debugging purposes. It dumps the state of each heap // block to standard output. func dumpHeap() {