Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions transform/optimizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ func Optimize(mod llvm.Module, config *compileopts.Config) []error {

// Run TinyGo-specific optimization passes.
OptimizeStringToBytes(mod)
OptimizeStringFromBytes(mod)
OptimizeReflectImplements(mod)
maxStackSize := config.MaxStackAlloc()
OptimizeAllocs(mod, nil, maxStackSize, nil)
Expand All @@ -91,6 +92,7 @@ func Optimize(mod llvm.Module, config *compileopts.Config) []error {
fmt.Fprintln(os.Stderr, pos.String()+": "+msg)
})
OptimizeStringToBytes(mod)
OptimizeStringFromBytes(mod)
OptimizeStringEqual(mod)

} else {
Expand Down
105 changes: 105 additions & 0 deletions transform/rtcalls.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,3 +178,108 @@ func OptimizeReflectImplements(mod llvm.Module) {
call.EraseFromParentAsInstruction()
}
}

// OptimizeStringFromBytes removes allocations for byte slice equality
// checks that use temporary strings. In particular, `bytes.Equal` allocates
// two such strings:
//
// func Equal(a, b []byte) bool {
// return string(a) == string(b)
// }
func OptimizeStringFromBytes(mod llvm.Module) {
stringFromBytes := mod.NamedFunction("runtime.stringFromBytes")
if stringFromBytes.IsNil() {
return
}
stringEqual := mod.NamedFunction("runtime.stringEqual")
if stringEqual.IsNil() {
return
}
Comment on lines +194 to +197
Copy link
Member

@aykevl aykevl Oct 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could be generalized to any function:

  • That has a function attribute like memory(argmem: read), memory(read), etc (in other words, doesn't modify memory). This should be automatically deduced for runtime.stringEqual but if not we can add it to compiler/symbol.go.
  • Where the string pointer parameter has the parameter attribute nocapture (meaning the pointer parameter is not kept across the function call).

For details, see: https://llvm.org/docs/LangRef.html#fnattrs and https://llvm.org/docs/LangRef.html#paramattrs.


uses:
for _, call := range getUses(stringFromBytes) {
sliceptr := call.Operand(0)
slicelen := call.Operand(1)
// Collect all uses of the slice pointer while replacing
// uses of the string length.
uses := make(map[llvm.Value]bool)
if !collectStringFromBytesUses(uses, slicelen, call) {
continue
}
inst := call
found := 0
// Scan instructions that follow the stringFromBytes call to
// account for all uses. Bail if any instruction may mutate the
// slice storage.
for len(uses) > found {
inst = llvm.NextInstruction(inst)
if inst.IsNil() {
// There are uses beyond this basic block.
continue uses
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I find this to be hard to read. Can you refactor this code to avoid the continue uses? Perhaps by moving some of it to a separate function?

}
switch {
case !inst.IsACallInst().IsNil():
if inst.CalledValue() != stringEqual {
// The called function is not runtime.stringEqual
// and may mutate the slice.
continue uses
}
case !inst.IsAGetElementPtrInst().IsNil(),
!inst.IsALoadInst().IsNil(),
!inst.IsAExtractValueInst().IsNil():
// Read-only instructions.
default:
// Instruction may perform a store on the slice.
continue uses
}
if _, ok := uses[inst]; ok {
found++
}
}
// At this point, all instructions between the stringFromBytes call
// and its uses are known not to mutate the slice storage. Replace
// all string pointer uses with the slice pointer and get rid of
// the call.
for use, repl := range uses {
if repl {
use.ReplaceAllUsesWith(sliceptr)
use.EraseFromParentAsInstruction()
}
}
call.EraseFromParentAsInstruction()
}
}

// collectStringFromBytesUses collects the string pointer uses, while replacing string
// length uses with the equivalent slice length.
func collectStringFromBytesUses(uses map[llvm.Value]bool, slicelen, v llvm.Value) bool {
if v.IsNil() {
return true
}
for _, use := range getUses(v) {
switch {
case !use.IsAExtractValueInst().IsNil():
switch use.Type().TypeKind() {
case llvm.IntegerTypeKind:
// String length can always safely be replaced with slice length.
use.ReplaceAllUsesWith(slicelen)
use.EraseFromParentAsInstruction()
case llvm.PointerTypeKind:
if !collectStringFromBytesUses(uses, slicelen, use) {
return false
}
// Record the use as replaceable with the slice pointer.
uses[use] = true
default:
return false
}
case !use.IsACallInst().IsNil():
// Record the use, but don't replace it.
uses[use] = false
default:
// Give up.
return false
}
}
return true
}
8 changes: 8 additions & 0 deletions transform/rtcalls_test.go
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be really nice to have some tests that are easier to read than LLVM IR. Could you add some tests like transform/testdata/allocs2.go? (This will probably require some refactoring to not duplicate code). This will also show that the optimization works for real Go code, instead of handcrafted IR (that might go out of date).

Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,11 @@ func TestOptimizeReflectImplements(t *testing.T) {
transform.OptimizeReflectImplements(mod)
})
}

func TestOptimizeBytesFromString(t *testing.T) {
t.Parallel()
testTransform(t, "testdata/stringfrombytes", func(mod llvm.Module) {
// Run optimization pass.
transform.OptimizeStringFromBytes(mod)
})
}
47 changes: 47 additions & 0 deletions transform/testdata/stringfrombytes.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64--linux"

@str = constant [6 x i8] c"foobar"

declare { ptr, i64, i64 } @runtime.stringToBytes(ptr, i64)

declare { ptr, i64 } @runtime.stringFromBytes(ptr, i64, i64)

declare i1 @runtime.stringEqual(ptr nocapture, i64, ptr nocapture, i64)

declare void @maybeSideEffect()

declare void @readString(ptr nocapture, i64)

define void @testReadOnly() {
entry:
; Build byte slice.
%0 = call fastcc { ptr, i64, i64 } @runtime.stringToBytes(ptr @str, i64 6)
%1 = extractvalue { ptr, i64, i64 } %0, 0
%2 = extractvalue { ptr, i64, i64 } %0, 1
%3 = extractvalue { ptr, i64, i64 } %0, 2

; Test that a side-effect free string equality check can optimize the stringFromBytes
; call away.
%4 = call fastcc { ptr, i64, i64 } @runtime.stringFromBytes(ptr %1, i64 %2, i64 %3)
%5 = extractvalue { ptr, i64, i64 } %4, 0
%6 = extractvalue { ptr, i64, i64 } %4, 1
call fastcc i1 @runtime.stringEqual(ptr %5, i64 %6, ptr %5, i64 %6)

; Compare it again, but with an intermittent side-effect that blocks the optimization.
%9 = call fastcc { ptr, i64, i64 } @runtime.stringFromBytes(ptr %1, i64 %2, i64 %3)
%10 = extractvalue { ptr, i64, i64 } %9, 0
%11 = extractvalue { ptr, i64, i64 } %9, 1
; Function call may write to the slice storage.
call fastcc void @maybeSideEffect()
call fastcc i1 @runtime.stringEqual(ptr %10, i64 %11, ptr %10, i64 %11)

; Reading the string after comparing should also defeat the optimization.
%13 = call fastcc { ptr, i64, i64 } @runtime.stringFromBytes(ptr %1, i64 %2, i64 %3)
%14 = extractvalue { ptr, i64, i64 } %13, 0
%15 = extractvalue { ptr, i64, i64 } %13, 1
call fastcc i1 @runtime.stringEqual(ptr %14, i64 %15, ptr %14, i64 %15)
call fastcc void @readString(ptr %14, i64 %15)
ret void
}

32 changes: 32 additions & 0 deletions transform/testdata/stringfrombytes.out.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64--linux"

@str = constant [6 x i8] c"foobar"

declare { ptr, i64, i64 } @runtime.stringToBytes(ptr, i64)

declare { ptr, i64 } @runtime.stringFromBytes(ptr, i64, i64)

declare i1 @runtime.stringEqual(ptr nocapture, i64, ptr nocapture, i64)

declare void @maybeSideEffect()

declare void @readString(ptr nocapture, i64)

define void @testReadOnly() {
entry:
%0 = call fastcc { ptr, i64, i64 } @runtime.stringToBytes(ptr @str, i64 6)
%1 = extractvalue { ptr, i64, i64 } %0, 0
%2 = extractvalue { ptr, i64, i64 } %0, 1
%3 = extractvalue { ptr, i64, i64 } %0, 2
%4 = call fastcc i1 @runtime.stringEqual(ptr %1, i64 %2, ptr %1, i64 %2)
%5 = call fastcc { ptr, i64, i64 } @runtime.stringFromBytes(ptr %1, i64 %2, i64 %3)
%6 = extractvalue { ptr, i64, i64 } %5, 0
call fastcc void @maybeSideEffect()
%7 = call fastcc i1 @runtime.stringEqual(ptr %6, i64 %2, ptr %6, i64 %2)
%8 = call fastcc { ptr, i64, i64 } @runtime.stringFromBytes(ptr %1, i64 %2, i64 %3)
%9 = extractvalue { ptr, i64, i64 } %8, 0
%10 = call fastcc i1 @runtime.stringEqual(ptr %9, i64 %2, ptr %9, i64 %2)
call fastcc void @readString(ptr %9, i64 %2)
ret void
}
Loading