diff --git a/builder/sizes_test.go b/builder/sizes_test.go index c1d44f443d..d9a1de0a4b 100644 --- a/builder/sizes_test.go +++ b/builder/sizes_test.go @@ -42,9 +42,9 @@ func TestBinarySize(t *testing.T) { // This is a small number of very diverse targets that we want to test. tests := []sizeTest{ // microcontrollers - {"hifive1b", "examples/echo", 3896, 280, 0, 2268}, - {"microbit", "examples/serial", 2860, 360, 8, 2272}, - {"wioterminal", "examples/pininterrupt", 7361, 1491, 116, 6912}, + {"hifive1b", "examples/echo", 3848, 296, 0, 2268}, + {"microbit", "examples/serial", 2844, 376, 8, 2272}, + {"wioterminal", "examples/pininterrupt", 7301, 1507, 116, 6912}, // TODO: also check wasm. Right now this is difficult, because // wasm binaries are run through wasm-opt and therefore the diff --git a/src/runtime/gc_blocks.go b/src/runtime/gc_blocks.go index 408656b258..e7ca5bdcc6 100644 --- a/src/runtime/gc_blocks.go +++ b/src/runtime/gc_blocks.go @@ -71,19 +71,20 @@ var zeroSizedAlloc uint8 type blockState uint8 const ( - blockStateFree blockState = 0 // 00 - blockStateHead blockState = 1 // 01 - blockStateTail blockState = 2 // 10 - blockStateMark blockState = 3 // 11 - blockStateMask blockState = 3 // 11 + blockStateLow blockState = 1 + blockStateHigh blockState = 1 << blocksPerStateByte + + blockStateFree blockState = 0 + blockStateHead blockState = blockStateLow + blockStateTail blockState = blockStateHigh + blockStateMark blockState = blockStateLow | blockStateHigh + blockStateMask blockState = blockStateLow | blockStateHigh ) +const blockStateEach = 1<>((b%blocksPerStateByte)*stateBits)) & blockStateMask + return blockState(stateByte>>(b%blocksPerStateByte)) & blockStateMask } // State returns the current block state. @@ -193,38 +194,12 @@ func (b gcBlock) state() blockState { // from head to mark. func (b gcBlock) setState(newState blockState) { stateBytePtr := (*uint8)(unsafe.Add(metadataStart, b/blocksPerStateByte)) - *stateBytePtr |= uint8(newState << ((b % blocksPerStateByte) * stateBits)) + *stateBytePtr |= uint8(newState << (b % blocksPerStateByte)) if gcAsserts && b.state() != newState { runtimePanic("gc: setState() was not successful") } } -// markFree sets the block state to free, no matter what state it was in before. -func (b gcBlock) markFree() { - stateBytePtr := (*uint8)(unsafe.Add(metadataStart, b/blocksPerStateByte)) - *stateBytePtr &^= uint8(blockStateMask << ((b % blocksPerStateByte) * stateBits)) - if gcAsserts && b.state() != blockStateFree { - runtimePanic("gc: markFree() was not successful") - } - if gcAsserts { - *(*[wordsPerBlock]uintptr)(unsafe.Pointer(b.address())) = [wordsPerBlock]uintptr{} - } -} - -// unmark changes the state of the block from mark to head. It must be marked -// before calling this function. -func (b gcBlock) unmark() { - if gcAsserts && b.state() != blockStateMark { - runtimePanic("gc: unmark() on a block that is not marked") - } - clearMask := blockStateMask ^ blockStateHead // the bits to clear from the state - stateBytePtr := (*uint8)(unsafe.Add(metadataStart, b/blocksPerStateByte)) - *stateBytePtr &^= uint8(clearMask << ((b % blocksPerStateByte) * stateBits)) - if gcAsserts && b.state() != blockStateHead { - runtimePanic("gc: unmark() was not successful") - } -} - func isOnHeap(ptr uintptr) bool { return ptr >= heapStart && ptr < uintptr(metadataStart) } @@ -685,36 +660,69 @@ func markRoot(addr, root uintptr) { // Sweep goes through all memory and frees unmarked memory. // It returns how many bytes are free in the heap after the sweep. func sweep() (freeBytes uintptr) { - freeCurrentObject := false - var freed uint64 - for block := gcBlock(0); block < endBlock; block++ { - switch block.state() { - case blockStateHead: - // Unmarked head. Free it, including all tail blocks following it. - block.markFree() - freeCurrentObject = true - gcFrees++ - freed++ - case blockStateTail: - if freeCurrentObject { - // This is a tail object following an unmarked head. - // Free it now. - block.markFree() - freed++ - } - case blockStateMark: - // This is a marked object. The next tail blocks must not be freed, - // but the mark bit must be removed so the next GC cycle will - // collect this object if it is unreferenced then. - block.unmark() - freeCurrentObject = false - case blockStateFree: - freeBytes += bytesPerBlock - } - } - gcFreedBlocks += freed - freeBytes += uintptr(freed) * bytesPerBlock - return + endBlock := endBlock + metadataEnd := unsafe.Add(metadataStart, (endBlock+(blocksPerStateByte-1))/blocksPerStateByte) + var oldFreeBlocks, freedHeads, freedTails uintptr + var carry byte + // Pre-subtract the blocks that do not actually exist from oldFreeBlocks. + oldFreeBlocks -= (blocksPerStateByte - 1) - uintptr(endBlock+(blocksPerStateByte-1))%blocksPerStateByte + for meta := metadataStart; meta != metadataEnd; meta = unsafe.Add(meta, 1) { + // Fetch the state byte. + stateBytePtr := (*byte)(unsafe.Pointer(meta)) + stateByte := *stateBytePtr + + // Count existing free blocks in the state byte. + lowState := stateByte & blockStateEach + highState := stateByte >> blocksPerStateByte + freeBlocks := lowState | highState + oldFreeBlocks += uintptr(count4LUT[freeBlocks]) + + // Count unmarked heads in the state byte. + unmarkedHeads := lowState &^ highState + freedHeads += uintptr(count4LUT[unmarkedHeads]) + + // Identify and seperate live and free tails. + // Adding 1 to a run of bits will clear the run. + // We can use this to clear tails after a freed head. + tails := highState &^ lowState + tailClear := tails + (unmarkedHeads << 1) + carry + carry = tailClear >> blocksPerStateByte + freedTails += uintptr(count4LUT[tails&^tailClear]) + tails &= tailClear + + // Construct the new state byte. + markedHeads := highState & lowState + *stateBytePtr = markedHeads | (tails << blocksPerStateByte) + } + + // Update the GC metrics. + gcFrees += uint64(freedHeads) + freedBlocks := freedHeads + freedTails + gcFreedBlocks += uint64(freedBlocks) + freeBlocks := oldFreeBlocks + freedBlocks + + return freeBlocks * bytesPerBlock +} + +// count4LUT is a lookup table used to count set bits in a 4-bit mask. +// TODO: replace with popcnt when available +var count4LUT = [16]uint8{ + 0b0000: 0, + 0b0001: 1, + 0b0010: 1, + 0b0011: 2, + 0b0100: 1, + 0b0101: 2, + 0b0110: 2, + 0b0111: 3, + 0b1000: 1, + 0b1001: 2, + 0b1010: 2, + 0b1011: 3, + 0b1100: 2, + 0b1101: 3, + 0b1110: 3, + 0b1111: 4, } // dumpHeap can be used for debugging purposes. It dumps the state of each heap